diff options
1037 files changed, 36317 insertions, 14377 deletions
diff --git a/Documentation/devicetree/bindings/net/brcm,amac.txt b/Documentation/devicetree/bindings/net/brcm,amac.txt index ba5ecc1041a5..2fefa1a44afd 100644 --- a/Documentation/devicetree/bindings/net/brcm,amac.txt +++ b/Documentation/devicetree/bindings/net/brcm,amac.txt @@ -2,11 +2,17 @@ Broadcom AMAC Ethernet Controller Device Tree Bindings ------------------------------------------------------------- Required properties: - - compatible: "brcm,amac" or "brcm,nsp-amac" - - reg: Address and length of the GMAC registers, - Address and length of the GMAC IDM registers - - reg-names: Names of the registers. Must have both "amac_base" and - "idm_base" + - compatible: "brcm,amac" + "brcm,nsp-amac" + "brcm,ns2-amac" + - reg: Address and length of the register set for the device. It + contains the information of registers in the same order as + described by reg-names + - reg-names: Names of the registers. + "amac_base": Address and length of the GMAC registers + "idm_base": Address and length of the GMAC IDM registers + "nicpm_base": Address and length of the NIC Port Manager + registers (required for Northstar2) - interrupts: Interrupt number Optional properties: diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 7629189398aa..32025eb4b31b 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -20,16 +20,35 @@ Required properties: Optional properties: - reset-gpios : Should be a gpio specifier for a reset line - +- interrupt-parent : Parent interrupt controller +- interrupts : Interrupt from the switch +- interrupt-controller : Indicates the switch is itself an interrupt + controller. This is used for the PHY interrupts. +#interrupt-cells = <2> : Controller uses two cells, number and flag +- mdio : container of PHY and devices on the switches MDIO + bus Example: mdio { #address-cells = <1>; #size-cells = <0>; + interrupt-parent = <&gpio0>; + interrupts = <27 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #interrupt-cells = <2>; switch0: switch@0 { compatible = "marvell,mv88e6085"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; }; + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; + }; }; diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt index d4b7f2e49984..abfbeecbcf39 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt @@ -45,6 +45,12 @@ Required properties: depends on the hardware user manual. - port-mode-offset: is offset of port mode field for each port in dsaf. Its value depends on the hardware user manual. +- mc-mac-mask: mask of multicast address, determines bit in multicast address + to set: + 1 stands for this bit will be precisely matched, TCAM will check this bit of + MAC address. + 0 stands for this bit will be fuzzy matched, TCAM won't care about this bit + of MAC address. [1] Documentation/devicetree/bindings/net/phy.txt @@ -74,10 +80,12 @@ dsaf0: dsa@c7000000 { reg = 0; phy-handle = <&phy0>; serdes-syscon = <&serdes>; + mc-mac-mask = [ff f0 00 00 00 00]; }; port@1 { reg = 1; serdes-syscon = <&serdes>; + mc-mac-mask = [ff f0 00 00 00 00]; }; }; diff --git a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt index 8516929c7251..065e8bdb957d 100644 --- a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt +++ b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt @@ -3,7 +3,7 @@ Properties for an MDIO bus multiplexer controlled by a memory-mapped device This is a special case of a MDIO bus multiplexer. A memory-mapped device, like an FPGA, is used to control which child bus is connected. The mdio-mux node must be a child of the memory-mapped device. The driver currently only -supports devices with eight-bit registers. +supports devices with 8, 16 or 32-bit registers. Required properties in addition to the generic multiplexer properties: @@ -11,7 +11,7 @@ Required properties in addition to the generic multiplexer properties: - reg : integer, contains the offset of the register that controls the bus multiplexer. The size field in the 'reg' property is the size of - register, and must therefore be 1. + register, and must therefore be 1, 2, or 4. - mux-mask : integer, contains an eight-bit mask that specifies which bits in the register control the actual bus multiplexer. The diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt index 99c7eb0a00c8..bdefefc66594 100644 --- a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt @@ -6,22 +6,27 @@ Required properties: Documentation/devicetree/bindings/net/phy.txt Optional properties: -- vsc8531,vddmac : The vddmac in mV. +- vsc8531,vddmac : The vddmac in mV. Allowed values is listed + in the first row of Table 1 (below). + This property is only used in combination + with the 'edge-slowdown' property. + Default value is 3300. - vsc8531,edge-slowdown : % the edge should be slowed down relative to - the fastest possible edge time. Native sign - need not enter. + the fastest possible edge time. Edge rate sets the drive strength of the MAC - interface output signals. Changing the drive - strength will affect the edge rate of the output - signal. The goal of this setting is to help - reduce electrical emission (EMI) by being able - to reprogram drive strength and in effect slow - down the edge rate if desired. Table 1 shows the - impact to the edge rate per VDDMAC supply for each - drive strength setting. - Ref: Table:1 - Edge rate change below. - -Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values + interface output signals. Changing the + drive strength will affect the edge rate of + the output signal. The goal of this setting + is to help reduce electrical emission (EMI) + by being able to reprogram drive strength + and in effect slow down the edge rate if + desired. + To adjust the edge-slowdown, the 'vddmac' + must be specified. Table 1 lists the + supported edge-slowdown values for a given + 'vddmac'. + Default value is 0%. + Ref: Table:1 - Edge rate change (below). Table: 1 - Edge rate change ----------------------------------------------------------------| @@ -29,23 +34,23 @@ Table: 1 - Edge rate change | | | 3300 mV 2500 mV 1800 mV 1500 mV | |---------------------------------------------------------------| -| Default Deafult Default Default | +| 0% 0% 0% 0% | | (Fastest) (recommended) (recommended) | |---------------------------------------------------------------| -| -2% -3% -5% -6% | +| 2% 3% 5% 6% | |---------------------------------------------------------------| -| -4% -6% -9% -14% | +| 4% 6% 9% 14% | |---------------------------------------------------------------| -| -7% -10% -16% -21% | +| 7% 10% 16% 21% | |(recommended) (recommended) | |---------------------------------------------------------------| -| -10% -14% -23% -29% | +| 10% 14% 23% 29% | |---------------------------------------------------------------| -| -17% -23% -35% -42% | +| 17% 23% 35% 42% | |---------------------------------------------------------------| -| -29% -37% -52% -58% | +| 29% 37% 52% 58% | |---------------------------------------------------------------| -| -53% -63% -76% -77% | +| 53% 63% 76% 77% | | (slowest) | |---------------------------------------------------------------| @@ -54,5 +59,5 @@ Example: vsc8531_0: ethernet-phy@0 { compatible = "ethernet-phy-id0007.0570"; vsc8531,vddmac = <3300>; - vsc8531,edge-slowdown = <21>; + vsc8531,edge-slowdown = <7>; }; diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt new file mode 100644 index 000000000000..df0534e2eda1 --- /dev/null +++ b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt @@ -0,0 +1,39 @@ +* Oxford Semiconductor OXNAS DWMAC Ethernet controller + +The device inherits all the properties of the dwmac/stmmac devices +described in the file stmmac.txt in the current directory with the +following changes. + +Required properties on all platforms: + +- compatible: For the OX820 SoC, it should be : + - "oxsemi,ox820-dwmac" to select glue + - "snps,dwmac-3.512" to select IP version. + +- clocks: Should contain phandles to the following clocks +- clock-names: Should contain the following: + - "stmmaceth" for the host clock - see stmmac.txt + - "gmac" for the peripheral gate clock + +- oxsemi,sys-ctrl: a phandle to the system controller syscon node + +Example : + +etha: ethernet@40400000 { + compatible = "oxsemi,ox820-dwmac", "snps,dwmac-3.512"; + reg = <0x40400000 0x2000>; + interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; + mac-address = [000000000000]; /* Filled in by U-Boot */ + phy-mode = "rgmii"; + + clocks = <&stdclk CLK_820_ETHA>, <&gmacclk>; + clock-names = "gmac", "stmmaceth"; + resets = <&reset RESET_MAC>; + + /* Regmap for sys registers */ + oxsemi,sys-ctrl = <&sys>; + + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index bc1c3c8bf8fa..4627da3d52c4 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -35,6 +35,10 @@ Optional Properties: - broken-turn-around: If set, indicates the PHY device does not correctly release the turn around line low at the end of a MDIO transaction. +- enet-phy-lane-swap: If set, indicates the PHY will swap the TX/RX lanes to + compensate for the board being designed with the lanes swapped. + + Example: ethernet-phy@0 { diff --git a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt index e77e167593db..309e37eb7c7c 100644 --- a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt +++ b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt @@ -13,3 +13,5 @@ Optional properties: 16-bit access only. - power-gpios: GPIO to control the PWRDWN pin - reset-gpios: GPIO to control the RESET pin +- pxa-u16-align4 : Boolean, put in place the workaround the force all + u16 writes to be 32 bits aligned diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt index 5d21141a68b5..85bf945b898f 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.txt +++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt @@ -9,6 +9,18 @@ Required properties: - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable values +Optional property: + - ti,min-output-impedance - MAC Interface Impedance control to set + the programmable output impedance to + minimum value (35 ohms). + - ti,max-output-impedance - MAC Interface Impedance control to set + the programmable output impedance to + maximum value (70 ohms). + +Note: ti,min-output-impedance and ti,max-output-impedance are mutually + exclusive. When both properties are present ti,max-output-impedance + takes precedence. + Default child nodes are standard Ethernet PHY device nodes as described in Documentation/devicetree/bindings/net/phy.txt diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 8a8d3d96f6c6..ccf94677b240 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -32,7 +32,7 @@ compatible interfaces. Once found, it will create subfolders in the /sys directories of each supported interface, e.g. # ls /sys/class/net/eth0/batman_adv/ -# iface_status mesh_iface +# elp_interval iface_status mesh_iface throughput_override If an interface does not have the "batman_adv" subfolder it prob- ably is not supported. Not supported interfaces are: loopback, @@ -71,17 +71,19 @@ All mesh wide settings can be found in batman's own interface folder: # ls /sys/class/net/bat0/mesh/ -#aggregated_ogms distributed_arp_table gw_sel_class orig_interval -#ap_isolation fragmentation hop_penalty routing_algo -#bonding gw_bandwidth isolation_mark vlan0 -#bridge_loop_avoidance gw_mode log_level +# aggregated_ogms fragmentation isolation_mark routing_algo +# ap_isolation gw_bandwidth log_level vlan0 +# bonding gw_mode multicast_mode +# bridge_loop_avoidance gw_sel_class network_coding +# distributed_arp_table hop_penalty orig_interval There is a special folder for debugging information: # ls /sys/kernel/debug/batman_adv/bat0/ -# bla_backbone_table log transtable_global -# bla_claim_table originators transtable_local -# gateways socket +# bla_backbone_table log neighbors transtable_local +# bla_claim_table mcast_flags originators +# dat_cache nc socket +# gateways nc_nodes transtable_global Some of the files contain all sort of status information regard- ing the mesh network. For example, you can view the table of @@ -159,13 +161,16 @@ file in debugfs The additional debug output is by default disabled. It can be en- abled during run time. Following log_levels are defined: -0 - All debug output disabled -1 - Enable messages related to routing / flooding / broadcasting -2 - Enable messages related to route added / changed / deleted -4 - Enable messages related to translation table operations -8 - Enable messages related to bridge loop avoidance -16 - Enable messaged related to DAT, ARP snooping and parsing -31 - Enable all messages + 0 - All debug output disabled + 1 - Enable messages related to routing / flooding / broadcasting + 2 - Enable messages related to route added / changed / deleted + 4 - Enable messages related to translation table operations + 8 - Enable messages related to bridge loop avoidance + 16 - Enable messages related to DAT, ARP snooping and parsing + 32 - Enable messages related to network coding + 64 - Enable messages related to multicast +128 - Enable messages related to throughput meter +255 - Enable all messages The debug output can be changed at runtime using the file /sys/class/net/bat0/mesh/log_level. e.g. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3db8c67d2c8d..5af48dd7c5fc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -967,6 +967,21 @@ igmp_qrv - INTEGER Default: 2 (as specified by RFC2236 8.1) Minimum: 1 (as specified by RFC6636 4.5) +force_igmp_version - INTEGER + 0 - (default) No enforcement of a IGMP version, IGMPv1/v2 fallback + allowed. Will back to IGMPv3 mode again if all IGMPv1/v2 Querier + Present timer expires. + 1 - Enforce to use IGMP version 1. Will also reply IGMPv1 report if + receive IGMPv2/v3 query. + 2 - Enforce to use IGMP version 2. Will fallback to IGMPv1 if receive + IGMPv1 query message. Will reply report if receive IGMPv3 query. + 3 - Enforce to use IGMP version 3. The same react with default 0. + + Note: this is not the same with force_mld_version because IGMPv3 RFC3376 + Security Considerations does not have clear description that we could + ignore other version messages completely as MLDv2 RFC3810. So make + this value as default 0 is recommended. + conf/interface/* changes special settings per interface (where "interface" is the name of your network interface) diff --git a/Documentation/networking/mac80211_hwsim/README b/Documentation/networking/mac80211_hwsim/README index 24ac91d56698..3566a725d19c 100644 --- a/Documentation/networking/mac80211_hwsim/README +++ b/Documentation/networking/mac80211_hwsim/README @@ -60,7 +60,7 @@ modprobe mac80211_hwsim hostapd hostapd.conf # Run wpa_supplicant (station) for wlan1 -wpa_supplicant -Dwext -iwlan1 -c wpa_supplicant.conf +wpa_supplicant -Dnl80211 -iwlan1 -c wpa_supplicant.conf More test cases are available in hostap.git: diff --git a/Documentation/networking/seg6-sysctl.txt b/Documentation/networking/seg6-sysctl.txt new file mode 100644 index 000000000000..bdbde23b19cb --- /dev/null +++ b/Documentation/networking/seg6-sysctl.txt @@ -0,0 +1,18 @@ +/proc/sys/net/conf/<iface>/seg6_* variables: + +seg6_enabled - BOOL + Accept or drop SR-enabled IPv6 packets on this interface. + + Relevant packets are those with SRH present and DA = local. + + 0 - disabled (default) + not 0 - enabled + +seg6_require_hmac - INTEGER + Define HMAC policy for ingress SR-enabled packets on this interface. + + -1 - Ignore HMAC field + 0 - Accept SR packets without HMAC, validate SR packets with HMAC + 1 - Drop SR packets without HMAC, validate SR packets with HMAC + + Default is 0. diff --git a/MAINTAINERS b/MAINTAINERS index 2a58eeac9452..987081272e25 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2530,6 +2530,8 @@ L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported F: kernel/bpf/ +F: tools/testing/selftests/bpf/ +F: lib/test_bpf.c BROADCOM B44 10/100 ETHERNET DRIVER M: Michael Chan <michael.chan@broadcom.com> @@ -8448,7 +8450,6 @@ F: include/uapi/linux/net_namespace.h F: tools/net/ F: tools/testing/selftests/net/ F: lib/random32.c -F: lib/test_bpf.c NETWORKING [IPv4/IPv6] M: "David S. Miller" <davem@davemloft.net> diff --git a/arch/arm/boot/dts/dra72-evm-revc.dts b/arch/arm/boot/dts/dra72-evm-revc.dts index 064b322a7a04..3b23b32e1b30 100644 --- a/arch/arm/boot/dts/dra72-evm-revc.dts +++ b/arch/arm/boot/dts/dra72-evm-revc.dts @@ -59,15 +59,17 @@ &davinci_mdio { dp83867_0: ethernet-phy@2 { reg = <2>; - ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>; - ti,tx-internal-delay = <DP83867_RGMIIDCTL_1_NS>; + ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; + ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; + ti,min-output-impedance; }; dp83867_1: ethernet-phy@3 { reg = <3>; - ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>; - ti,tx-internal-delay = <DP83867_RGMIIDCTL_1_NS>; + ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; + ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; + ti,min-output-imepdance; }; }; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index 5c1fcab4a6f7..1552db00cc59 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -88,10 +88,16 @@ switch0: switch0@0 { compatible = "marvell,mv88e6085"; + pinctrl-0 = <&pinctrl_gpio_switch0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; reg = <0>; dsa,member = <0 0>; + interrupt-parent = <&gpio0>; + interrupts = <27 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #interrupt-cells = <2>; ports { #address-cells = <1>; @@ -99,16 +105,19 @@ port@0 { reg = <0>; label = "lan0"; + phy-handle = <&switch0phy0>; }; port@1 { reg = <1>; label = "lan1"; + phy-handle = <&switch0phy1>; }; port@2 { reg = <2>; label = "lan2"; + phy-handle = <&switch0phy2>; }; switch0port5: port@5 { @@ -133,6 +142,24 @@ }; }; }; + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch0phy0: switch0phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; + switch0phy1: switch1phy0@1 { + reg = <1>; + interrupt-parent = <&switch0>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; }; + switch0phy2: switch1phy0@2 { + reg = <2>; + interrupt-parent = <&switch0>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + }; + }; }; }; @@ -143,10 +170,16 @@ switch1: switch1@0 { compatible = "marvell,mv88e6085"; + pinctrl-0 = <&pinctrl_gpio_switch1>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; reg = <0>; dsa,member = <0 1>; + interrupt-parent = <&gpio0>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #interrupt-cells = <2>; ports { #address-cells = <1>; @@ -196,12 +229,18 @@ #size-cells = <0>; switch1phy0: switch1phy0@0 { reg = <0>; + interrupt-parent = <&switch1>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; }; switch1phy1: switch1phy0@1 { reg = <1>; + interrupt-parent = <&switch1>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; }; switch1phy2: switch1phy0@2 { reg = <2>; + interrupt-parent = <&switch1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; }; }; }; @@ -636,6 +675,18 @@ >; }; + pinctrl_gpio_switch0: pinctrl-gpio-switch0 { + fsl,pins = < + VF610_PAD_PTB5__GPIO_27 0x219d + >; + }; + + pinctrl_gpio_switch1: pinctrl-gpio-switch1 { + fsl,pins = < + VF610_PAD_PTB4__GPIO_26 0x219d + >; + }; + pinctrl_i2c_mux_reset: pinctrl-i2c-mux-reset { fsl,pins = < VF610_PAD_PTE14__GPIO_119 0x31c2 diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index 66070acaa888..d1db32b1a2c6 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -85,6 +85,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata smc91x_platdata = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 40964069a17c..a2d851a3a546 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -140,6 +140,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata mainstone_smc91x_info = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 702f4f14b708..7b6610e9dae4 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -673,6 +673,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata stargate2_smc91x_info = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index b09f3bc5c6c1..c4d544244b19 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -56,6 +56,10 @@ }; }; +&enet { + status = "ok"; +}; + &pci_phy0 { status = "ok"; }; @@ -174,6 +178,7 @@ &mdio_mux_iproc { mdio@10 { gphy0: eth-phy@10 { + enet-phy-lane-swap; reg = <0x10>; }; }; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index d95dc408629a..773ed593da4d 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -191,6 +191,18 @@ #include "ns2-clock.dtsi" + enet: ethernet@61000000 { + compatible = "brcm,ns2-amac"; + reg = <0x61000000 0x1000>, + <0x61090000 0x1000>, + <0x61030000 0x100>; + reg-names = "amac_base", "idm_base", "nicpm_base"; + interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>; + phy-handle = <&gphy0>; + phy-mode = "rgmii"; + status = "disabled"; + }; + dma0: dma@61360000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x61360000 0x1000>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index dab2cb0c1f1c..6be08113a96d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -257,6 +257,7 @@ CONFIG_GPIO_DWAPB=y CONFIG_GPIO_PL061=y CONFIG_GPIO_RCAR=y CONFIG_GPIO_XGENE=y +CONFIG_GPIO_XGENE_SB=y CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X_IRQ=y CONFIG_GPIO_MAX77620=y diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index a0985fd088d1..fc4be028c418 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -184,7 +184,6 @@ static const struct net_device_ops nfeth_netdev_ops = { .ndo_start_xmit = nfeth_xmit, .ndo_tx_timeout = nfeth_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/arch/mn10300/unit-asb2303/include/unit/smc91111.h b/arch/mn10300/unit-asb2303/include/unit/smc91111.h index dd456e9c513f..dd4e2946438e 100644 --- a/arch/mn10300/unit-asb2303/include/unit/smc91111.h +++ b/arch/mn10300/unit-asb2303/include/unit/smc91111.h @@ -30,7 +30,7 @@ #if SMC_CAN_USE_16BIT #define SMC_inw(a, r) inw((unsigned long) ((a) + (r))) -#define SMC_outw(v, a, r) outw(v, (unsigned long) ((a) + (r))) +#define SMC_outw(lp, v, a, r) outw(v, (unsigned long) ((a) + (r))) #define SMC_insw(a, r, p, l) insw((unsigned long) ((a) + (r)), (p), (l)) #define SMC_outsw(a, r, p, l) outsw((unsigned long) ((a) + (r)), (p), (l)) #endif diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 2cd5b6874c7b..1669240c7a25 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -256,13 +256,6 @@ static void uml_net_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int uml_net_change_mtu(struct net_device *dev, int new_mtu) -{ - dev->mtu = new_mtu; - - return 0; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void uml_net_poll_controller(struct net_device *dev) { @@ -374,7 +367,6 @@ static const struct net_device_ops uml_netdev_ops = { .ndo_set_rx_mode = uml_net_set_multicast_list, .ndo_tx_timeout = uml_net_tx_timeout, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = uml_net_change_mtu, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = uml_net_poll_controller, diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 80a0f1a78551..8948392c0525 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -132,28 +132,27 @@ static void aead_wmem_wakeup(struct sock *sk) static int aead_wait_for_data(struct sock *sk, unsigned flags) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct alg_sock *ask = alg_sk(sk); struct aead_ctx *ctx = ask->private; long timeout; - DEFINE_WAIT(wait); int err = -ERESTARTSYS; if (flags & MSG_DONTWAIT) return -EAGAIN; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, !ctx->more)) { + if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 28556fce4267..1e38aaa8303e 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -199,26 +199,26 @@ static void skcipher_free_sgl(struct sock *sk) static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) { - long timeout; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = -ERESTARTSYS; + long timeout; if (flags & MSG_DONTWAIT) return -EAGAIN; sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) { + if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; } @@ -242,10 +242,10 @@ static void skcipher_wmem_wakeup(struct sock *sk) static int skcipher_wait_for_data(struct sock *sk, unsigned flags) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; long timeout; - DEFINE_WAIT(wait); int err = -ERESTARTSYS; if (flags & MSG_DONTWAIT) { @@ -254,17 +254,17 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, ctx->used)) { + if (sk_wait_event(sk, &timeout, ctx->used, &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index e24ea4e796e4..7fceb3b4691b 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -82,8 +82,8 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { { .name = ACPI_GENL_MCAST_GROUP_NAME, }, }; -static struct genl_family acpi_event_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family acpi_event_genl_family __ro_after_init = { + .module = THIS_MODULE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, .maxattr = ACPI_GENL_ATTR_MAX, @@ -144,7 +144,7 @@ int acpi_bus_generate_netlink_event(const char *device_class, EXPORT_SYMBOL(acpi_bus_generate_netlink_event); -static int acpi_event_genetlink_init(void) +static int __init acpi_event_genetlink_init(void) { return genl_register_family(&acpi_event_genl_family); } diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 6ac2b2b1e8de..5ad037c07ec7 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -584,7 +584,7 @@ static ssize_t hardware_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", data32); } -static DEVICE_ATTR(console, 0644, console_show, console_store); +static DEVICE_ATTR_RW(console); #define SOLOS_ATTR_RO(x) static DEVICE_ATTR(x, 0444, solos_param_show, NULL); diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index d28922df01d7..a7dd5f4f2c5a 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4248,7 +4248,6 @@ static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 15475892af0c..5d3640264f2d 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -1368,15 +1368,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -static int fwnet_change_mtu(struct net_device *net, int new_mtu) -{ - if (new_mtu < 68) - return -EINVAL; - - net->mtu = new_mtu; - return 0; -} - static const struct ethtool_ops fwnet_ethtool_ops = { .get_link = ethtool_op_get_link, }; @@ -1385,7 +1376,6 @@ static const struct net_device_ops fwnet_netdev_ops = { .ndo_open = fwnet_open, .ndo_stop = fwnet_stop, .ndo_start_xmit = fwnet_tx, - .ndo_change_mtu = fwnet_change_mtu, }; static void fwnet_init_dev(struct net_device *net) @@ -1454,7 +1444,6 @@ static int fwnet_probe(struct fw_unit *unit, struct net_device *net; bool allocated_netdev = false; struct fwnet_device *dev; - unsigned max_mtu; int ret; union fwnet_hwaddr *ha; @@ -1493,13 +1482,9 @@ static int fwnet_probe(struct fw_unit *unit, goto out; dev->local_fifo = dev->handler.offset; - /* - * Use the RFC 2734 default 1500 octets or the maximum payload - * as initial MTU - */ - max_mtu = (1 << (card->max_receive + 1)) - - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE; - net->mtu = min(1500U, max_mtu); + net->mtu = 1500U; + net->min_mtu = ETH_MIN_MTU; + net->max_mtu = 0xfff; /* Set our hardware address while we're at it */ ha = (union fwnet_hwaddr *)net->dev_addr; diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 6031cd146556..7ef819680acd 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -960,15 +960,6 @@ static int ssip_pn_stop(struct net_device *dev) return 0; } -static int ssip_pn_set_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu > SSIP_MAX_MTU || new_mtu < PHONET_MIN_MTU) - return -EINVAL; - dev->mtu = new_mtu; - - return 0; -} - static void ssip_xmit_work(struct work_struct *work) { struct ssi_protocol *ssi = @@ -1060,7 +1051,6 @@ static const struct net_device_ops ssip_pn_ops = { .ndo_open = ssip_pn_open, .ndo_stop = ssip_pn_stop, .ndo_start_xmit = ssip_pn_xmit, - .ndo_change_mtu = ssip_pn_set_mtu, }; static void ssip_pn_setup(struct net_device *dev) @@ -1136,6 +1126,10 @@ static int ssi_protocol_probe(struct device *dev) goto out1; } + /* MTU range: 6 - 65535 */ + ssi->netdev->min_mtu = PHONET_MIN_MTU; + ssi->netdev->max_mtu = SSIP_MAX_MTU; + SET_NETDEV_DEV(ssi->netdev, dev); netif_carrier_off(ssi->netdev); err = register_netdev(ssi->netdev); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 19d499dcab76..0c0bea091de8 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -127,14 +127,7 @@ void ib_cache_release_one(struct ib_device *device); static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, struct net_device *upper) { - struct net_device *_upper = NULL; - struct list_head *iter; - - netdev_for_each_all_upper_dev_rcu(dev, _upper, iter) - if (_upper == upper) - break; - - return _upper == upper; + return netdev_has_upper_dev_all_rcu(dev, upper); } int addr_init(void); diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 06556c34606d..3a64a0881882 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -437,6 +437,28 @@ static void callback_for_addr_gid_device_scan(struct ib_device *device, &parsed->gid_attr); } +struct upper_list { + struct list_head list; + struct net_device *upper; +}; + +static int netdev_upper_walk(struct net_device *upper, void *data) +{ + struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + struct list_head *upper_list = data; + + if (!entry) { + pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); + return 0; + } + + list_add_tail(&entry->list, upper_list); + dev_hold(upper); + entry->upper = upper; + + return 0; +} + static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, void *cookie, void (*handle_netdev)(struct ib_device *ib_dev, @@ -444,30 +466,12 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, struct net_device *ndev)) { struct net_device *ndev = (struct net_device *)cookie; - struct upper_list { - struct list_head list; - struct net_device *upper; - }; - struct net_device *upper; - struct list_head *iter; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) { - struct upper_list *entry = kmalloc(sizeof(*entry), - GFP_ATOMIC); - - if (!entry) { - pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); - continue; - } - - list_add_tail(&entry->list, &upper_list); - dev_hold(upper); - entry->upper = upper; - } + netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 63036c731626..76ed57f1b678 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1771,13 +1771,13 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mutex_lock(&dev->flow_db.lock); list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rule(iter->rule); + mlx5_del_flow_rules(iter->rule); put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); @@ -1857,7 +1857,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, num_groups, - 0); + 0, 0); if (!IS_ERR(ft)) { prio->refcount = 0; @@ -1877,10 +1877,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; - u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1905,12 +1905,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - handler->rule = mlx5_add_flow_rule(ft, spec, - action, - MLX5_FS_DEFAULT_FLOW_TAG, - dst); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, + dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -1941,7 +1941,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de handler_dst = create_flow_rule(dev, ft_prio, flow_attr, dst); if (IS_ERR(handler_dst)) { - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_dst; @@ -2004,7 +2004,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_ucast; @@ -2046,7 +2046,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, return handler_rx; err_tx: - mlx5_del_flow_rule(handler_rx->rule); + mlx5_del_flow_rules(handler_rx->rule); ft_rx->refcount--; kfree(handler_rx); err: @@ -2358,6 +2358,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibev.event = IB_EVENT_CLIENT_REREGISTER; port = (u8)param; break; + default: + return; } ibev.device = &ibdev->ib_dev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dcdcd195fe53..d5d007740159 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -153,7 +153,7 @@ struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; }; struct mlx5_ib_flow_db { diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 35cbb17bec12..2baa45a8e401 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -65,7 +65,6 @@ MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -int max_mtu = 9000; int interrupt_mod_interval = 0; /* Interoperability */ diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index e7430c9254d3..85acd0843b50 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -83,6 +83,8 @@ #define NES_FIRST_QPN 64 #define NES_SW_CONTEXT_ALIGN 1024 +#define NES_MAX_MTU 9000 + #define NES_NIC_MAX_NICS 16 #define NES_MAX_ARP_TABLE_SIZE 4096 @@ -169,8 +171,6 @@ do { \ #include "nes_cm.h" #include "nes_mgt.h" -extern int max_mtu; -#define max_frame_len (max_mtu+ETH_HLEN) extern int interrupt_mod_interval; extern int nes_if_count; extern int mpa_version; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2b27d1351cf7..7f8597d6738b 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -981,20 +981,16 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - int ret = 0; u8 jumbomode = 0; u32 nic_active; u32 nic_active_bit; u32 uc_all_active; u32 mc_all_active; - if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) - return -EINVAL; - netdev->mtu = new_mtu; nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; - if (netdev->mtu > 1500) { + if (netdev->mtu > ETH_DATA_LEN) { jumbomode=1; } nes_nic_init_timer_defaults(nesdev, jumbomode); @@ -1020,7 +1016,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } - return ret; + return 0; } @@ -1658,7 +1654,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->watchdog_timeo = NES_TX_TIMEOUT; netdev->irq = nesdev->pcidev->irq; - netdev->mtu = ETH_DATA_LEN; + netdev->max_mtu = NES_MAX_MTU; netdev->hard_header_len = ETH_HLEN; netdev->addr_len = ETH_ALEN; netdev->type = ARPHRD_ETHER; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b58d9dca5c93..c50794fb92db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -292,6 +292,25 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) return dev; } +struct ipoib_walk_data { + const struct sockaddr *addr; + struct net_device *result; +}; + +static int ipoib_upper_walk(struct net_device *upper, void *_data) +{ + struct ipoib_walk_data *data = _data; + int ret = 0; + + if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) { + dev_hold(upper); + data->result = upper; + ret = 1; + } + + return ret; +} + /** * Find a net_device matching the given address, which is an upper device of * the given net_device. @@ -304,27 +323,21 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) static struct net_device *ipoib_get_net_dev_match_addr( const struct sockaddr *addr, struct net_device *dev) { - struct net_device *upper, - *result = NULL; - struct list_head *iter; + struct ipoib_walk_data data = { + .addr = addr, + }; rcu_read_lock(); if (ipoib_is_dev_match_addr_rcu(addr, dev)) { dev_hold(dev); - result = dev; + data.result = dev; goto out; } - netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { - if (ipoib_is_dev_match_addr_rcu(addr, upper)) { - dev_hold(upper); - result = upper; - break; - } - } + netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data); out: rcu_read_unlock(); - return result; + return data.result; } /* returns the number of IPoIB netdevs on top a given ipoib device matching a @@ -2016,6 +2029,7 @@ static struct net_device *ipoib_add_port(const char *format, /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + priv->dev->max_mtu = IPOIB_CM_MTU; priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c index 5609deee7cd3..b93a4e9a8d34 100644 --- a/drivers/isdn/hysdn/hysdn_net.c +++ b/drivers/isdn/hysdn/hysdn_net.c @@ -232,7 +232,6 @@ static const struct net_device_ops hysdn_netdev_ops = { .ndo_open = net_open, .ndo_stop = net_close, .ndo_start_xmit = net_send_packet, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 9914f69a4a02..0da622f5fe69 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -1198,7 +1198,6 @@ static const struct net_device_ops dvb_netdev_ops = { .ndo_start_xmit = dvb_net_tx, .ndo_set_rx_mode = dvb_net_set_multicast_list, .ndo_set_mac_address = dvb_net_set_mac, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -1209,6 +1208,7 @@ static void dvb_net_setup(struct net_device *dev) dev->header_ops = &dvb_header_ops; dev->netdev_ops = &dvb_netdev_ops; dev->mtu = 4096; + dev->max_mtu = 4096; dev->flags |= IFF_NOARP; } diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index 6955c9e22d57..55dd71bbdc2a 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -549,16 +549,6 @@ mpt_lan_close(struct net_device *dev) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -static int -mpt_lan_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < MPT_LAN_MIN_MTU) || (new_mtu > MPT_LAN_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - -/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Tx timeout handler. */ static void mpt_lan_tx_timeout(struct net_device *dev) @@ -1304,7 +1294,6 @@ static const struct net_device_ops mpt_netdev_ops = { .ndo_open = mpt_lan_open, .ndo_stop = mpt_lan_close, .ndo_start_xmit = mpt_lan_sdu_send, - .ndo_change_mtu = mpt_lan_change_mtu, .ndo_tx_timeout = mpt_lan_tx_timeout, }; @@ -1375,6 +1364,10 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum) dev->netdev_ops = &mpt_netdev_ops; dev->watchdog_timeo = MPT_LAN_TX_TIMEOUT; + /* MTU range: 96 - 65280 */ + dev->min_mtu = MPT_LAN_MIN_MTU; + dev->max_mtu = MPT_LAN_MAX_MTU; + dlprintk((KERN_INFO MYNAM ": Finished registering dev " "and setting initial values\n")); diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 557f9782c53c..0c26eaf5f62b 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -118,6 +118,8 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock); * now, the default is 64KB. */ #define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 68 comes from min TCP+IP+MAC header */ +#define XPNET_MIN_MTU 68 /* 32KB has been determined to be the ideal */ #define XPNET_DEF_MTU (0x8000UL) @@ -330,22 +332,6 @@ xpnet_dev_stop(struct net_device *dev) return 0; } -static int -xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - /* 68 comes from min TCP+IP+MAC header */ - if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { - dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " - "between 68 and %ld\n", dev->name, new_mtu, - XPNET_MAX_MTU); - return -EINVAL; - } - - dev->mtu = new_mtu; - dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); - return 0; -} - /* * Notification that the other end has received the message and * DMA'd the skb information. At this point, they are done with @@ -519,7 +505,6 @@ static const struct net_device_ops xpnet_netdev_ops = { .ndo_open = xpnet_dev_open, .ndo_stop = xpnet_dev_stop, .ndo_start_xmit = xpnet_dev_hard_start_xmit, - .ndo_change_mtu = xpnet_dev_change_mtu, .ndo_tx_timeout = xpnet_dev_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -555,6 +540,8 @@ xpnet_init(void) xpnet_device->netdev_ops = &xpnet_netdev_ops; xpnet_device->mtu = XPNET_DEF_MTU; + xpnet_device->min_mtu = XPNET_MIN_MTU; + xpnet_device->max_mtu = XPNET_MAX_MTU; /* * Multicast assumes the LSB of the first octet is set for multicast diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index e90c6a7333d7..31f89f1c6123 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -59,7 +59,6 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static const struct net_device_ops ipddp_netdev_ops = { .ndo_start_xmit = ipddp_xmit, .ndo_do_ioctl = ipddp_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 551f0f8dead3..c80b023092dd 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -950,13 +950,61 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], dev_queue_xmit(skb); } +struct alb_walk_data { + struct bonding *bond; + struct slave *slave; + u8 *mac_addr; + bool strict_match; +}; + +static int alb_upper_dev_walk(struct net_device *upper, void *_data) +{ + struct alb_walk_data *data = _data; + bool strict_match = data->strict_match; + struct bonding *bond = data->bond; + struct slave *slave = data->slave; + u8 *mac_addr = data->mac_addr; + struct bond_vlan_tag *tags; + + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { + if (strict_match && + ether_addr_equal_64bits(mac_addr, + upper->dev_addr)) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else if (!strict_match) { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } + } + + /* If this is a macvlan device, then only send updates + * when strict_match is turned off. + */ + if (netif_is_macvlan(upper) && !strict_match) { + tags = bond_verify_device_path(bond->dev, upper, 0); + if (IS_ERR_OR_NULL(tags)) + BUG(); + alb_send_lp_vid(slave, upper->dev_addr, + tags[0].vlan_proto, tags[0].vlan_id); + kfree(tags); + } + + return 0; +} + static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); - struct net_device *upper; - struct list_head *iter; - struct bond_vlan_tag *tags; + struct alb_walk_data data = { + .strict_match = strict_match, + .mac_addr = mac_addr, + .slave = slave, + .bond = bond, + }; /* send untagged */ alb_send_lp_vid(slave, mac_addr, 0, 0); @@ -965,33 +1013,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], * for that device. */ rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { - if (strict_match && - ether_addr_equal_64bits(mac_addr, - upper->dev_addr)) { - alb_send_lp_vid(slave, mac_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); - } else if (!strict_match) { - alb_send_lp_vid(slave, upper->dev_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); - } - } - - /* If this is a macvlan device, then only send updates - * when strict_match is turned off. - */ - if (netif_is_macvlan(upper) && !strict_match) { - tags = bond_verify_device_path(bond->dev, upper, 0); - if (IS_ERR_OR_NULL(tags)) - BUG(); - alb_send_lp_vid(slave, upper->dev_addr, - tags[0].vlan_proto, tags[0].vlan_id); - kfree(tags); - } - } + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data); rcu_read_unlock(); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5fa36ebc0640..5708f17e4cdf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2270,22 +2270,23 @@ re_arm: } } +static int bond_upper_dev_walk(struct net_device *upper, void *data) +{ + __be32 ip = *((__be32 *)data); + + return ip == bond_confirm_addr(upper, 0, ip); +} + static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { - struct net_device *upper; - struct list_head *iter; bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) return true; rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (ip == bond_confirm_addr(upper, 0, ip)) { - ret = true; - break; - } - } + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip)) + ret = true; rcu_read_unlock(); return ret; @@ -4079,16 +4080,16 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static int bond_ethtool_get_settings(struct net_device *bond_dev, - struct ethtool_cmd *ecmd) +static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev, + struct ethtool_link_ksettings *cmd) { struct bonding *bond = netdev_priv(bond_dev); unsigned long speed = 0; struct list_head *iter; struct slave *slave; - ecmd->duplex = DUPLEX_UNKNOWN; - ecmd->port = PORT_OTHER; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.port = PORT_OTHER; /* Since bond_slave_can_tx returns false for all inactive or down slaves, we * do not need to check mode. Though link speed might not represent @@ -4099,12 +4100,12 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev, if (bond_slave_can_tx(slave)) { if (slave->speed != SPEED_UNKNOWN) speed += slave->speed; - if (ecmd->duplex == DUPLEX_UNKNOWN && + if (cmd->base.duplex == DUPLEX_UNKNOWN && slave->duplex != DUPLEX_UNKNOWN) - ecmd->duplex = slave->duplex; + cmd->base.duplex = slave->duplex; } } - ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); + cmd->base.speed = speed ? : SPEED_UNKNOWN; return 0; } @@ -4120,8 +4121,8 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, static const struct ethtool_ops bond_ethtool_ops = { .get_drvinfo = bond_ethtool_get_drvinfo, - .get_settings = bond_ethtool_get_settings, .get_link = ethtool_op_get_link, + .get_link_ksettings = bond_ethtool_get_link_ksettings, }; static const struct net_device_ops bond_netdev_ops = { diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index 221f5f011ff9..91c876a0a647 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -7,9 +7,6 @@ * */ - -#include <linux/module.h> - #include <linux/kernel.h> #include <linux/delay.h> #include <linux/types.h> @@ -264,7 +261,6 @@ static const struct net_device_ops e100_netdev_ops = { .ndo_do_ioctl = e100_ioctl, .ndo_set_mac_address = e100_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_config = e100_set_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = e100_netpoll, @@ -412,6 +408,7 @@ etrax_ethernet_init(void) led_next_time = jiffies; return 0; } +device_initcall(etrax_ethernet_init) /* set MAC address of the interface. called from the core after a * SIOCSIFADDR ioctl, and from the bootup above. @@ -1715,11 +1712,6 @@ e100_netpoll(struct net_device* netdev) } #endif -static int -etrax_init_module(void) -{ - return etrax_ethernet_init(); -} static int __init e100_boot_setup(char* str) @@ -1742,5 +1734,3 @@ e100_boot_setup(char* str) } __setup("etrax100_eth=", e100_boot_setup); - -module_init(etrax_init_module); diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 10ce820daa48..c36be318de1a 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o mv88e6xxx-objs += global1.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o +mv88e6xxx-objs += port.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 883fd9809dd2..d6d9d66b81ce 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -18,11 +18,15 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/if_bridge.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/jiffies.h> #include <linux/list.h> #include <linux/mdio.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/of_irq.h> #include <linux/of_mdio.h> #include <linux/netdevice.h> #include <linux/gpio/consumer.h> @@ -33,6 +37,7 @@ #include "mv88e6xxx.h" #include "global1.h" #include "global2.h" +#include "port.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -217,22 +222,6 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, - u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - return mv88e6xxx_read(chip, addr, reg, val); -} - -static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, - u16 val) -{ - int addr = chip->info->port_base_addr + port; - - return mv88e6xxx_write(chip, addr, reg, val); -} - static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -323,6 +312,164 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) reg, val); } +static void mv88e6xxx_g1_irq_mask(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int n = d->hwirq; + + chip->g1_irq.masked |= (1 << n); +} + +static void mv88e6xxx_g1_irq_unmask(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int n = d->hwirq; + + chip->g1_irq.masked &= ~(1 << n); +} + +static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id) +{ + struct mv88e6xxx_chip *chip = dev_id; + unsigned int nhandled = 0; + unsigned int sub_irq; + unsigned int n; + u16 reg; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, ®); + mutex_unlock(&chip->reg_lock); + + if (err) + goto out; + + for (n = 0; n < chip->g1_irq.nirqs; ++n) { + if (reg & (1 << n)) { + sub_irq = irq_find_mapping(chip->g1_irq.domain, n); + handle_nested_irq(sub_irq); + ++nhandled; + } + } +out: + return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); +} + +static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + + mutex_lock(&chip->reg_lock); +} + +static void mv88e6xxx_g1_irq_bus_sync_unlock(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + u16 mask = GENMASK(chip->g1_irq.nirqs, 0); + u16 reg; + int err; + + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, ®); + if (err) + goto out; + + reg &= ~mask; + reg |= (~chip->g1_irq.masked & mask); + + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg); + if (err) + goto out; + +out: + mutex_unlock(&chip->reg_lock); +} + +static struct irq_chip mv88e6xxx_g1_irq_chip = { + .name = "mv88e6xxx-g1", + .irq_mask = mv88e6xxx_g1_irq_mask, + .irq_unmask = mv88e6xxx_g1_irq_unmask, + .irq_bus_lock = mv88e6xxx_g1_irq_bus_lock, + .irq_bus_sync_unlock = mv88e6xxx_g1_irq_bus_sync_unlock, +}; + +static int mv88e6xxx_g1_irq_domain_map(struct irq_domain *d, + unsigned int irq, + irq_hw_number_t hwirq) +{ + struct mv88e6xxx_chip *chip = d->host_data; + + irq_set_chip_data(irq, d->host_data); + irq_set_chip_and_handler(irq, &chip->g1_irq.chip, handle_level_irq); + irq_set_noprobe(irq); + + return 0; +} + +static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = { + .map = mv88e6xxx_g1_irq_domain_map, + .xlate = irq_domain_xlate_twocell, +}; + +static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) +{ + int irq, virq; + + for (irq = 0; irq < 16; irq++) { + virq = irq_find_mapping(chip->g2_irq.domain, irq); + irq_dispose_mapping(virq); + } + + irq_domain_remove(chip->g2_irq.domain); +} + +static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip) +{ + int err, irq; + u16 reg; + + chip->g1_irq.nirqs = chip->info->g1_irqs; + chip->g1_irq.domain = irq_domain_add_simple( + NULL, chip->g1_irq.nirqs, 0, + &mv88e6xxx_g1_irq_domain_ops, chip); + if (!chip->g1_irq.domain) + return -ENOMEM; + + for (irq = 0; irq < chip->g1_irq.nirqs; irq++) + irq_create_mapping(chip->g1_irq.domain, irq); + + chip->g1_irq.chip = mv88e6xxx_g1_irq_chip; + chip->g1_irq.masked = ~0; + + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, ®); + if (err) + goto out; + + reg &= ~GENMASK(chip->g1_irq.nirqs, 0); + + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg); + if (err) + goto out; + + /* Reading the interrupt status clears (most of) them */ + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, ®); + if (err) + goto out; + + err = request_threaded_irq(chip->irq, NULL, + mv88e6xxx_g1_irq_thread_fn, + IRQF_ONESHOT | IRQF_TRIGGER_FALLING, + dev_name(chip->dev), chip); + if (err) + goto out; + + return 0; + +out: + mv88e6xxx_g1_irq_free(chip); + + return err; +} + int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { int i; @@ -477,9 +624,8 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip) { mutex_init(&chip->ppu_mutex); INIT_WORK(&chip->ppu_work, mv88e6xxx_ppu_reenable_work); - init_timer(&chip->ppu_timer); - chip->ppu_timer.data = (unsigned long)chip; - chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; + setup_timer(&chip->ppu_timer, mv88e6xxx_ppu_reenable_timer, + (unsigned long)chip); } static void mv88e6xxx_ppu_state_destroy(struct mv88e6xxx_chip *chip) @@ -555,6 +701,47 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6352; } +static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, + int link, int speed, int duplex, + phy_interface_t mode) +{ + int err; + + if (!chip->info->ops->port_set_link) + return 0; + + /* Port's MAC control must not be changed unless the link is down */ + err = chip->info->ops->port_set_link(chip, port, 0); + if (err) + return err; + + if (chip->info->ops->port_set_speed) { + err = chip->info->ops->port_set_speed(chip, port, speed); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + if (chip->info->ops->port_set_duplex) { + err = chip->info->ops->port_set_duplex(chip, port, duplex); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + if (chip->info->ops->port_set_rgmii_delay) { + err = chip->info->ops->port_set_rgmii_delay(chip, port, mode); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + err = 0; +restore_link: + if (chip->info->ops->port_set_link(chip, port, link)) + netdev_err(chip->ds->ports[port].netdev, + "failed to restore MAC's link\n"); + + return err; +} + /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. @@ -563,64 +750,18 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); - if (err) - goto out; - - reg &= ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); - - reg |= PORT_PCS_CTRL_FORCE_LINK; - if (phydev->link) - reg |= PORT_PCS_CTRL_LINK_UP; - - if (mv88e6xxx_6065_family(chip) && phydev->speed > SPEED_100) - goto out; - - switch (phydev->speed) { - case SPEED_1000: - reg |= PORT_PCS_CTRL_1000; - break; - case SPEED_100: - reg |= PORT_PCS_CTRL_100; - break; - case SPEED_10: - reg |= PORT_PCS_CTRL_10; - break; - default: - pr_info("Unknown speed"); - goto out; - } - - reg |= PORT_PCS_CTRL_FORCE_DUPLEX; - if (phydev->duplex == DUPLEX_FULL) - reg |= PORT_PCS_CTRL_DUPLEX_FULL; - - if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) && - (port >= mv88e6xxx_num_ports(chip) - 2)) { - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) - reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) - reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) - reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | - PORT_PCS_CTRL_RGMII_DELAY_TXCLK); - } - mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); - -out: + err = mv88e6xxx_port_setup_mac(chip, port, phydev->link, phydev->speed, + phydev->duplex, phydev->interface); mutex_unlock(&chip->reg_lock); + + if (err && err != -EOPNOTSUPP) + netdev_err(ds->ports[port].netdev, "failed to configure MAC\n"); } static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) @@ -1069,54 +1210,16 @@ static int _mv88e6xxx_atu_remove(struct mv88e6xxx_chip *chip, u16 fid, return _mv88e6xxx_atu_move(chip, fid, port, 0x0f, static_too); } -static const char * const mv88e6xxx_port_state_names[] = { - [PORT_CONTROL_STATE_DISABLED] = "Disabled", - [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening", - [PORT_CONTROL_STATE_LEARNING] = "Learning", - [PORT_CONTROL_STATE_FORWARDING] = "Forwarding", -}; - -static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, - u8 state) -{ - struct dsa_switch *ds = chip->ds; - u16 reg; - int err; - u8 oldstate; - - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); - if (err) - return err; - - oldstate = reg & PORT_CONTROL_STATE_MASK; - - reg &= ~PORT_CONTROL_STATE_MASK; - reg |= state; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - - return 0; -} - static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) { struct net_device *bridge = chip->ports[port].bridge_dev; - const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - u16 reg; - int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { - output_ports = mask; + output_ports = ~0; } else { for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { /* allow sending frames to every group member */ @@ -1132,14 +1235,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); - if (err) - return err; - - reg &= ~mask; - reg |= output_ports & mask; - - return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + return mv88e6xxx_port_set_vlan_map(chip, port, output_ports); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1167,13 +1263,11 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, } mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_state(chip, port, stp_state); + err = mv88e6xxx_port_set_state(chip, port, stp_state); mutex_unlock(&chip->reg_lock); if (err) - netdev_err(ds->ports[port].netdev, - "failed to update state to %s\n", - mv88e6xxx_port_state_names[stp_state]); + netdev_err(ds->ports[port].netdev, "failed to update state\n"); } static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) @@ -1189,49 +1283,6 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); } -static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, - u16 *new, u16 *old) -{ - struct dsa_switch *ds = chip->ds; - u16 pvid, reg; - int err; - - err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); - if (err) - return err; - - pvid = reg & PORT_DEFAULT_VLAN_MASK; - - if (new) { - reg &= ~PORT_DEFAULT_VLAN_MASK; - reg |= *new & PORT_DEFAULT_VLAN_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, - "DefaultVID %d (was %d)\n", *new, pvid); - } - - if (old) - *old = pvid; - - return 0; -} - -static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_chip *chip, - int port, u16 *pvid) -{ - return _mv88e6xxx_port_pvid(chip, port, NULL, pvid); -} - -static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, - int port, u16 pvid) -{ - return _mv88e6xxx_port_pvid(chip, port, &pvid, NULL); -} - static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); @@ -1411,7 +1462,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_pvid_get(chip, port, &pvid); + err = mv88e6xxx_port_get_pvid(chip, port, &pvid); if (err) goto unlock; @@ -1575,75 +1626,6 @@ loadpurge: return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE); } -static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, - u16 *new, u16 *old) -{ - struct dsa_switch *ds = chip->ds; - u16 upper_mask; - u16 fid; - u16 reg; - int err; - - if (mv88e6xxx_num_databases(chip) == 4096) - upper_mask = 0xff; - else if (mv88e6xxx_num_databases(chip) == 256) - upper_mask = 0xf; - else - return -EOPNOTSUPP; - - /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); - if (err) - return err; - - fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; - - if (new) { - reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; - reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); - if (err) - return err; - } - - /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); - if (err) - return err; - - fid |= (reg & upper_mask) << 4; - - if (new) { - reg &= ~upper_mask; - reg |= (*new >> 4) & upper_mask; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, - "FID %d (was %d)\n", *new, fid); - } - - if (old) - *old = fid; - - return 0; -} - -static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_chip *chip, - int port, u16 *fid) -{ - return _mv88e6xxx_port_fid(chip, port, NULL, fid); -} - -static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip, - int port, u16 fid) -{ - return _mv88e6xxx_port_fid(chip, port, &fid, NULL); -} - static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) { DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); @@ -1654,7 +1636,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) /* Set every FID bit used by the (un)bridged ports */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { - err = _mv88e6xxx_port_fid_get(chip, i, fid); + err = mv88e6xxx_port_get_fid(chip, i, fid); if (err) return err; @@ -1819,48 +1801,19 @@ unlock: return err; } -static const char * const mv88e6xxx_port_8021q_mode_names[] = { - [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled", - [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback", - [PORT_CONTROL_2_8021Q_CHECK] = "Check", - [PORT_CONTROL_2_8021Q_SECURE] = "Secure", -}; - static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { struct mv88e6xxx_chip *chip = ds->priv; - u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : + u16 mode = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - u16 reg; int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); - if (err) - goto unlock; - - old = reg & PORT_CONTROL_2_8021Q_MASK; - - if (new != old) { - reg &= ~PORT_CONTROL_2_8021Q_MASK; - reg |= new & PORT_CONTROL_2_8021Q_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); - if (err) - goto unlock; - - netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", - mv88e6xxx_port_8021q_mode_names[new], - mv88e6xxx_port_8021q_mode_names[old]); - } - - err = 0; -unlock: + err = mv88e6xxx_port_set_8021q_mode(chip, port, mode); mutex_unlock(&chip->reg_lock); return err; @@ -1928,7 +1881,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, "failed to add VLAN %d%c\n", vid, untagged ? 'u' : 't'); - if (pvid && _mv88e6xxx_port_pvid_set(chip, port, vlan->vid_end)) + if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid_end)) netdev_err(ds->ports[port].netdev, "failed to set PVID %d\n", vlan->vid_end); @@ -1983,7 +1936,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_pvid_get(chip, port, &pvid); + err = mv88e6xxx_port_get_pvid(chip, port, &pvid); if (err) goto unlock; @@ -1993,7 +1946,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, goto unlock; if (vid == pvid) { - err = _mv88e6xxx_port_pvid_set(chip, port, 0); + err = mv88e6xxx_port_set_pvid(chip, port, 0); if (err) goto unlock; } @@ -2104,7 +2057,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, /* Null VLAN ID corresponds to the port private database */ if (vid == 0) - err = _mv88e6xxx_port_fid_get(chip, port, &vlan.fid); + err = mv88e6xxx_port_get_fid(chip, port, &vlan.fid); else err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false); if (err) @@ -2280,7 +2233,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, int err; /* Dump port's default Filtering Information Database (VLAN ID 0) */ - err = _mv88e6xxx_port_fid_get(chip, port, &fid); + err = mv88e6xxx_port_get_fid(chip, port, &fid); if (err) return err; @@ -2380,12 +2333,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) /* Set all ports to the disabled state. */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { - err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); - if (err) - return err; - - err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, - reg & 0xfffc); + err = mv88e6xxx_port_set_state(chip, i, + PORT_CONTROL_STATE_DISABLED); if (err) return err; } @@ -2455,35 +2404,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) int err; u16 reg; - if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) || - mv88e6xxx_6065_family(chip) || mv88e6xxx_6320_family(chip)) { - /* MAC Forcing register: don't force link, speed, - * duplex or flow control state to any particular - * values on physical ports, but force the CPU port - * and all DSA ports to their maximum bandwidth and - * full duplex. - */ - err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { - reg &= ~PORT_PCS_CTRL_UNFORCED; - reg |= PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX; - if (mv88e6xxx_6065_family(chip)) - reg |= PORT_PCS_CTRL_100; - else - reg |= PORT_PCS_CTRL_1000; - } else { - reg |= PORT_PCS_CTRL_UNFORCED; - } - - err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); - if (err) - return err; - } + /* MAC Forcing register: don't force link, speed, duplex or flow control + * state to any particular values on physical ports, but force the CPU + * port and all DSA ports to their maximum bandwidth and full duplex. + */ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP, + SPEED_MAX, DUPLEX_FULL, + PHY_INTERFACE_MODE_NA); + else + err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED, + SPEED_UNFORCED, DUPLEX_UNFORCED, + PHY_INTERFACE_MODE_NA); + if (err) + return err; /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, * disable Header mode, enable IGMP/MLD snooping, disable VLAN @@ -2687,7 +2621,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - err = _mv88e6xxx_port_fid_set(chip, port, 0); + err = mv88e6xxx_port_set_fid(chip, port, 0); if (err) return err; @@ -2701,7 +2635,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } -int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) { int err; @@ -2770,7 +2704,11 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) /* Enable the PHY Polling Unit if present, don't discard any packets, * and mask all interrupt sources. */ - reg = 0; + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, ®); + if (err < 0) + return err; + + reg &= ~GLOBAL_CONTROL_PPU_ENABLE; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU) || mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE)) reg |= GLOBAL_CONTROL_PPU_ENABLE; @@ -2875,10 +2813,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) mutex_lock(&chip->reg_lock); - err = mv88e6xxx_switch_reset(chip); - if (err) - goto unlock; - /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_setup_port(chip, i); @@ -3206,42 +3140,64 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6095_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6123_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6131_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6161_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6165_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6171_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6172_ops = { @@ -3250,12 +3206,20 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6175_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -3264,12 +3228,19 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6185_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6240_ops = { @@ -3278,6 +3249,10 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -3286,6 +3261,9 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -3294,18 +3272,29 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6350_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6351_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6352_ops = { @@ -3314,6 +3303,10 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { @@ -3326,6 +3319,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 8, .flags = MV88E6XXX_FLAGS_FAMILY_6097, .ops = &mv88e6085_ops, }, @@ -3339,6 +3333,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 8, .flags = MV88E6XXX_FLAGS_FAMILY_6095, .ops = &mv88e6095_ops, }, @@ -3352,6 +3347,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6123_ops, }, @@ -3365,6 +3361,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6131_ops, }, @@ -3378,6 +3375,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6161_ops, }, @@ -3391,6 +3389,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6165_ops, }, @@ -3404,6 +3403,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6171_ops, }, @@ -3417,6 +3417,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6172_ops, }, @@ -3430,6 +3431,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6175_ops, }, @@ -3443,6 +3445,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6176_ops, }, @@ -3456,6 +3459,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 8, .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6185_ops, }, @@ -3469,6 +3473,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6240_ops, }, @@ -3482,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 8, .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6320_ops, }, @@ -3495,6 +3501,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 8, .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6321_ops, }, @@ -3508,6 +3515,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6350_ops, }, @@ -3521,6 +3529,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6351_ops, }, @@ -3534,6 +3543,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .port_base_addr = 0x10, .global1_addr = 0x1b, .age_time_coeff = 15000, + .g1_irqs = 9, .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6352_ops, }, @@ -3667,6 +3677,12 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, if (err) goto free; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_switch_reset(chip); + mutex_unlock(&chip->reg_lock); + if (err) + goto free; + mv88e6xxx_phy_init(chip); err = mv88e6xxx_mdio_register(chip, NULL); @@ -3837,17 +3853,56 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) !of_property_read_u32(np, "eeprom-length", &eeprom_len)) chip->eeprom_len = eeprom_len; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_switch_reset(chip); + mutex_unlock(&chip->reg_lock); + if (err) + goto out; + + chip->irq = of_irq_get(np, 0); + if (chip->irq == -EPROBE_DEFER) { + err = chip->irq; + goto out; + } + + if (chip->irq > 0) { + /* Has to be performed before the MDIO bus is created, + * because the PHYs will link there interrupts to these + * interrupt controllers + */ + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_irq_setup(chip); + mutex_unlock(&chip->reg_lock); + + if (err) + goto out; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) { + err = mv88e6xxx_g2_irq_setup(chip); + if (err) + goto out_g1_irq; + } + } + err = mv88e6xxx_mdio_register(chip, np); if (err) - return err; + goto out_g2_irq; err = mv88e6xxx_register_switch(chip, np); - if (err) { - mv88e6xxx_mdio_unregister(chip); - return err; - } + if (err) + goto out_mdio; return 0; + +out_mdio: + mv88e6xxx_mdio_unregister(chip); +out_g2_irq: + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) + mv88e6xxx_g2_irq_free(chip); +out_g1_irq: + mv88e6xxx_g1_irq_free(chip); +out: + return err; } static void mv88e6xxx_remove(struct mdio_device *mdiodev) @@ -3858,6 +3913,10 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_mdio_unregister(chip); + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) + mv88e6xxx_g2_irq_free(chip); + mv88e6xxx_g1_irq_free(chip); } static const struct of_device_id mv88e6xxx_of_match[] = { diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index cf686e7506a9..1a0b13521d13 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1,5 +1,6 @@ /* - * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * Marvell 88E6xxx Switch Global 2 Registers support (device address + * 0x1C) * * Copyright (c) 2008 Marvell Semiconductor * @@ -11,6 +12,7 @@ * (at your option) any later version. */ +#include <linux/irqdomain.h> #include "mv88e6xxx.h" #include "global2.h" @@ -417,6 +419,141 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); } +static void mv88e6xxx_g2_irq_mask(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int n = d->hwirq; + + chip->g2_irq.masked |= (1 << n); +} + +static void mv88e6xxx_g2_irq_unmask(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int n = d->hwirq; + + chip->g2_irq.masked &= ~(1 << n); +} + +static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id) +{ + struct mv88e6xxx_chip *chip = dev_id; + unsigned int nhandled = 0; + unsigned int sub_irq; + unsigned int n; + int err; + u16 reg; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g2_read(chip, GLOBAL2_INT_SOURCE, ®); + mutex_unlock(&chip->reg_lock); + if (err) + goto out; + + for (n = 0; n < 16; ++n) { + if (reg & (1 << n)) { + sub_irq = irq_find_mapping(chip->g2_irq.domain, n); + handle_nested_irq(sub_irq); + ++nhandled; + } + } +out: + return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); +} + +static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + + mutex_lock(&chip->reg_lock); +} + +static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d) +{ + struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + + mv88e6xxx_g2_write(chip, GLOBAL2_INT_MASK, ~chip->g2_irq.masked); + + mutex_unlock(&chip->reg_lock); +} + +static struct irq_chip mv88e6xxx_g2_irq_chip = { + .name = "mv88e6xxx-g2", + .irq_mask = mv88e6xxx_g2_irq_mask, + .irq_unmask = mv88e6xxx_g2_irq_unmask, + .irq_bus_lock = mv88e6xxx_g2_irq_bus_lock, + .irq_bus_sync_unlock = mv88e6xxx_g2_irq_bus_sync_unlock, +}; + +static int mv88e6xxx_g2_irq_domain_map(struct irq_domain *d, + unsigned int irq, + irq_hw_number_t hwirq) +{ + struct mv88e6xxx_chip *chip = d->host_data; + + irq_set_chip_data(irq, d->host_data); + irq_set_chip_and_handler(irq, &chip->g2_irq.chip, handle_level_irq); + irq_set_noprobe(irq); + + return 0; +} + +static const struct irq_domain_ops mv88e6xxx_g2_irq_domain_ops = { + .map = mv88e6xxx_g2_irq_domain_map, + .xlate = irq_domain_xlate_twocell, +}; + +void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) +{ + int irq, virq; + + for (irq = 0; irq < 16; irq++) { + virq = irq_find_mapping(chip->g2_irq.domain, irq); + irq_dispose_mapping(virq); + } + + irq_domain_remove(chip->g2_irq.domain); +} + +int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) +{ + int device_irq; + int err, irq; + + if (!chip->dev->of_node) + return -EINVAL; + + chip->g2_irq.domain = irq_domain_add_simple( + chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip); + if (!chip->g2_irq.domain) + return -ENOMEM; + + for (irq = 0; irq < 16; irq++) + irq_create_mapping(chip->g2_irq.domain, irq); + + chip->g2_irq.chip = mv88e6xxx_g2_irq_chip; + chip->g2_irq.masked = ~0; + + device_irq = irq_find_mapping(chip->g1_irq.domain, + GLOBAL_STATUS_IRQ_DEVICE); + if (device_irq < 0) { + err = device_irq; + goto out; + } + + err = devm_request_threaded_irq(chip->dev, device_irq, NULL, + mv88e6xxx_g2_irq_thread_fn, + IRQF_ONESHOT, "mv88e6xxx-g1", chip); + if (err) + goto out; + + return 0; +out: + mv88e6xxx_g2_irq_free(chip); + + return err; +} + int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) { u16 reg; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index c4bb9035ee3a..1eb3ddd21551 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -33,6 +33,8 @@ int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, struct ethtool_eeprom *eeprom, u8 *data); int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); +int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip); +void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip); #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ @@ -83,6 +85,15 @@ static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) return -EOPNOTSUPP; } +static inline int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) +{ +} + #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ #endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e572121c196e..929613021eff 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -13,6 +13,7 @@ #define __MV88E6XXX_H #include <linux/if_vlan.h> +#include <linux/irq.h> #include <linux/gpio/consumer.h> #ifndef UINT64_MAX @@ -60,16 +61,22 @@ #define PORT_PCS_CTRL 0x01 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK BIT(15) #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK BIT(14) +#define PORT_PCS_CTRL_FORCE_SPEED BIT(13) /* 6390 */ +#define PORT_PCS_CTRL_ALTSPEED BIT(12) /* 6390 */ +#define PORT_PCS_CTRL_200BASE BIT(12) /* 6352 */ #define PORT_PCS_CTRL_FC BIT(7) #define PORT_PCS_CTRL_FORCE_FC BIT(6) #define PORT_PCS_CTRL_LINK_UP BIT(5) #define PORT_PCS_CTRL_FORCE_LINK BIT(4) #define PORT_PCS_CTRL_DUPLEX_FULL BIT(3) #define PORT_PCS_CTRL_FORCE_DUPLEX BIT(2) -#define PORT_PCS_CTRL_10 0x00 -#define PORT_PCS_CTRL_100 0x01 -#define PORT_PCS_CTRL_1000 0x02 -#define PORT_PCS_CTRL_UNFORCED 0x03 +#define PORT_PCS_CTRL_SPEED_MASK (0x03) +#define PORT_PCS_CTRL_SPEED_10 (0x00) +#define PORT_PCS_CTRL_SPEED_100 (0x01) +#define PORT_PCS_CTRL_SPEED_200 (0x02) /* 6065 and non Gb chips */ +#define PORT_PCS_CTRL_SPEED_1000 (0x02) +#define PORT_PCS_CTRL_SPEED_10000 (0x03) /* 6390X */ +#define PORT_PCS_CTRL_SPEED_UNFORCED (0x03) #define PORT_PAUSE_CTRL 0x02 #define PORT_SWITCH_ID 0x03 #define PORT_SWITCH_ID_PROD_NUM_6085 0x04a @@ -167,6 +174,15 @@ #define GLOBAL_STATUS_PPU_INITIALIZING (0x1 << 14) #define GLOBAL_STATUS_PPU_DISABLED (0x2 << 14) #define GLOBAL_STATUS_PPU_POLLING (0x3 << 14) +#define GLOBAL_STATUS_IRQ_AVB 8 +#define GLOBAL_STATUS_IRQ_DEVICE 7 +#define GLOBAL_STATUS_IRQ_STATS 6 +#define GLOBAL_STATUS_IRQ_VTU_PROBLEM 5 +#define GLOBAL_STATUS_IRQ_VTU_DONE 4 +#define GLOBAL_STATUS_IRQ_ATU_PROBLEM 3 +#define GLOBAL_STATUS_IRQ_ATU_DONE 2 +#define GLOBAL_STATUS_IRQ_TCAM_DONE 1 +#define GLOBAL_STATUS_IRQ_EEPROM_DONE 0 #define GLOBAL_MAC_01 0x01 #define GLOBAL_MAC_23 0x02 #define GLOBAL_MAC_45 0x03 @@ -417,6 +433,7 @@ enum mv88e6xxx_cap { * The device contains a second set of global 16-bit registers. */ MV88E6XXX_CAP_GLOBAL2, + MV88E6XXX_CAP_G2_INT, /* (0x00) Interrupt Status */ MV88E6XXX_CAP_G2_MGMT_EN_2X, /* (0x02) MGMT Enable Register 2x */ MV88E6XXX_CAP_G2_MGMT_EN_0X, /* (0x03) MGMT Enable Register 0x */ MV88E6XXX_CAP_G2_IRL_CMD, /* (0x09) Ingress Rate Command */ @@ -464,6 +481,7 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) #define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_INT BIT_ULL(MV88E6XXX_CAP_G2_INT) #define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) #define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) #define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) @@ -524,6 +542,7 @@ enum mv88e6xxx_cap { (MV88E6XXX_FLAG_G1_ATU_FID | \ MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_INT | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ @@ -536,6 +555,7 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6185 \ (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_INT | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ @@ -561,6 +581,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G1_ATU_FID | \ MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_INT | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ @@ -578,6 +599,7 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G1_ATU_FID | \ MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_INT | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ @@ -602,6 +624,7 @@ struct mv88e6xxx_info { unsigned int port_base_addr; unsigned int global1_addr; unsigned int age_time_coeff; + unsigned int g1_irqs; unsigned long long flags; const struct mv88e6xxx_ops *ops; }; @@ -628,6 +651,13 @@ struct mv88e6xxx_priv_port { struct net_device *bridge_dev; }; +struct mv88e6xxx_irq { + u16 masked; + struct irq_chip chip; + struct irq_domain *domain; + unsigned int nirqs; +}; + struct mv88e6xxx_chip { const struct mv88e6xxx_info *info; @@ -677,6 +707,13 @@ struct mv88e6xxx_chip { /* And the MDIO bus itself */ struct mii_bus *mdio_bus; + + /* There can be two interrupt controllers, which are chained + * off a GPIO as interrupt source + */ + struct mv88e6xxx_irq g1_irq; + struct mv88e6xxx_irq g2_irq; + int irq; }; struct mv88e6xxx_bus_ops { @@ -696,6 +733,41 @@ struct mv88e6xxx_ops { u16 *val); int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); + + /* RGMII Receive/Transmit Timing Control + * Add delay on PHY_INTERFACE_MODE_RGMII_*ID, no delay otherwise. + */ + int (*port_set_rgmii_delay)(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); + +#define LINK_FORCED_DOWN 0 +#define LINK_FORCED_UP 1 +#define LINK_UNFORCED -2 + + /* Port's MAC link state + * Use LINK_FORCED_UP or LINK_FORCED_DOWN to force link up or down, + * or LINK_UNFORCED for normal link detection. + */ + int (*port_set_link)(struct mv88e6xxx_chip *chip, int port, int link); + +#define DUPLEX_UNFORCED -2 + + /* Port's MAC duplex mode + * + * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex, + * or DUPLEX_UNFORCED for normal duplex detection. + */ + int (*port_set_duplex)(struct mv88e6xxx_chip *chip, int port, int dup); + +#define SPEED_MAX INT_MAX +#define SPEED_UNFORCED -2 + + /* Port's MAC speed (in Mbps) + * + * Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid. + * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value. + */ + int (*port_set_speed)(struct mv88e6xxx_chip *chip, int port, int speed); }; enum stat_type { diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c new file mode 100644 index 000000000000..e4978f6367aa --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -0,0 +1,498 @@ +/* + * Marvell 88E6xxx Switch Port Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "port.h" + +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +/* Offset 0x01: MAC (or PCS or Physical) Control Register + * + * Link, Duplex and Flow Control have one force bit, one value bit. + * + * For port's MAC speed, ForceSpd (or SpdValue) bits 1:0 program the value. + * Alternative values require the 200BASE (or AltSpeed) bit 12 set. + * Newer chips need a ForcedSpd bit 13 set to consider the value. + */ + +static int mv88e6xxx_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_RGMII_DELAY_RXCLK | + PORT_PCS_CTRL_RGMII_DELAY_TXCLK); + + switch (mode) { + case PHY_INTERFACE_MODE_RGMII_RXID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK; + break; + case PHY_INTERFACE_MODE_RGMII_ID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK | + PORT_PCS_CTRL_RGMII_DELAY_TXCLK; + break; + case PHY_INTERFACE_MODE_RGMII: + break; + default: + return 0; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "delay RXCLK %s, TXCLK %s\n", + reg & PORT_PCS_CTRL_RGMII_DELAY_RXCLK ? "yes" : "no", + reg & PORT_PCS_CTRL_RGMII_DELAY_TXCLK ? "yes" : "no"); + + return 0; +} + +int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + if (port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_rgmii_delay(chip, port, mode); +} + +int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + if (port != 0) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_rgmii_delay(chip, port, mode); +} + +int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP); + + switch (link) { + case LINK_FORCED_DOWN: + reg |= PORT_PCS_CTRL_FORCE_LINK; + break; + case LINK_FORCED_UP: + reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP; + break; + case LINK_UNFORCED: + /* normal link detection */ + break; + default: + return -EINVAL; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "%s link %s\n", + reg & PORT_PCS_CTRL_FORCE_LINK ? "Force" : "Unforce", + reg & PORT_PCS_CTRL_LINK_UP ? "up" : "down"); + + return 0; +} + +int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL); + + switch (dup) { + case DUPLEX_HALF: + reg |= PORT_PCS_CTRL_FORCE_DUPLEX; + break; + case DUPLEX_FULL: + reg |= PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL; + break; + case DUPLEX_UNFORCED: + /* normal duplex detection */ + break; + default: + return -EINVAL; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "%s %s duplex\n", + reg & PORT_PCS_CTRL_FORCE_DUPLEX ? "Force" : "Unforce", + reg & PORT_PCS_CTRL_DUPLEX_FULL ? "full" : "half"); + + return 0; +} + +static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, + int speed, bool alt_bit, bool force_bit) +{ + u16 reg, ctrl; + int err; + + switch (speed) { + case 10: + ctrl = PORT_PCS_CTRL_SPEED_10; + break; + case 100: + ctrl = PORT_PCS_CTRL_SPEED_100; + break; + case 200: + if (alt_bit) + ctrl = PORT_PCS_CTRL_SPEED_100 | PORT_PCS_CTRL_ALTSPEED; + else + ctrl = PORT_PCS_CTRL_SPEED_200; + break; + case 1000: + ctrl = PORT_PCS_CTRL_SPEED_1000; + break; + case 2500: + ctrl = PORT_PCS_CTRL_SPEED_1000 | PORT_PCS_CTRL_ALTSPEED; + break; + case 10000: + /* all bits set, fall through... */ + case SPEED_UNFORCED: + ctrl = PORT_PCS_CTRL_SPEED_UNFORCED; + break; + default: + return -EOPNOTSUPP; + } + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~PORT_PCS_CTRL_SPEED_MASK; + if (alt_bit) + reg &= ~PORT_PCS_CTRL_ALTSPEED; + if (force_bit) { + reg &= ~PORT_PCS_CTRL_FORCE_SPEED; + if (speed) + ctrl |= PORT_PCS_CTRL_FORCE_SPEED; + } + reg |= ctrl; + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + if (speed) + netdev_dbg(chip->ds->ports[port].netdev, + "Speed set to %d Mbps\n", speed); + else + netdev_dbg(chip->ds->ports[port].netdev, "Speed unforced\n"); + + return 0; +} + +/* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */ +int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 200; + + if (speed > 200) + return -EOPNOTSUPP; + + /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */ + return mv88e6xxx_port_set_speed(chip, port, speed, false, false); +} + +/* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */ +int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 1000; + + if (speed == 200 || speed > 1000) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, false, false); +} + +/* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ +int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 1000; + + if (speed > 1000) + return -EOPNOTSUPP; + + if (speed == 200 && port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, false); +} + +/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6390) */ +int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 9 ? 1000 : 2500; + + if (speed > 2500) + return -EOPNOTSUPP; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed == 2500 && port < 9) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, true); +} + +/* Support 10, 100, 200, 1000, 2500, 10000 Mbps (e.g. 88E6190X) */ +int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 9 ? 1000 : 10000; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed >= 2500 && port < 9) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, true); +} + +/* Offset 0x04: Port Control Register */ + +static const char * const mv88e6xxx_port_state_names[] = { + [PORT_CONTROL_STATE_DISABLED] = "Disabled", + [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening", + [PORT_CONTROL_STATE_LEARNING] = "Learning", + [PORT_CONTROL_STATE_FORWARDING] = "Forwarding", +}; + +int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; + + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "PortState set to %s\n", + mv88e6xxx_port_state_names[state]); + + return 0; +} + +/* Offset 0x05: Port Control 1 */ + +/* Offset 0x06: Port Based VLAN Map */ + +int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map) +{ + const u16 mask = GENMASK(mv88e6xxx_num_ports(chip) - 1, 0); + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + reg &= ~mask; + reg |= map & mask; + + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "VLANTable set to %.3x\n", + map); + + return 0; +} + +int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid) +{ + const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4; + u16 reg; + int err; + + /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */ + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + *fid = (reg & 0xf000) >> 12; + + /* Port's default FID upper bits are located in reg 0x05, offset 0 */ + if (upper_mask) { + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; + + *fid |= (reg & upper_mask) << 4; + } + + return 0; +} + +int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid) +{ + const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4; + u16 reg; + int err; + + if (fid >= mv88e6xxx_num_databases(chip)) + return -EINVAL; + + /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */ + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + reg &= 0x0fff; + reg |= (fid & 0x000f) << 12; + + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; + + /* Port's default FID upper bits are located in reg 0x05, offset 0 */ + if (upper_mask) { + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; + + reg &= ~upper_mask; + reg |= (fid >> 4) & upper_mask; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; + } + + netdev_dbg(chip->ds->ports[port].netdev, "FID set to %u\n", fid); + + return 0; +} + +/* Offset 0x07: Default Port VLAN ID & Priority */ + +int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; + + *pvid = reg & PORT_DEFAULT_VLAN_MASK; + + return 0; +} + +int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; + + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= pvid & PORT_DEFAULT_VLAN_MASK; + + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "DefaultVID set to %u\n", + pvid); + + return 0; +} + +/* Offset 0x08: Port Control 2 Register */ + +static const char * const mv88e6xxx_port_8021q_mode_names[] = { + [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled", + [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback", + [PORT_CONTROL_2_8021Q_CHECK] = "Check", + [PORT_CONTROL_2_8021Q_SECURE] = "Secure", +}; + +int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, + u16 mode) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) + return err; + + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= mode & PORT_CONTROL_2_8021Q_MASK; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "802.1QMode set to %s\n", + mv88e6xxx_port_8021q_mode_names[mode]); + + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h new file mode 100644 index 000000000000..499129c1489c --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -0,0 +1,52 @@ +/* + * Marvell 88E6xxx Switch Port Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_PORT_H +#define _MV88E6XXX_PORT_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val); +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val); + +int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); +int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); + +int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link); + +int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); + +int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); + +int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state); + +int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map); + +int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid); +int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid); + +int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid); +int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid); + +int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, + u16 mode); + +#endif /* _MV88E6XXX_PORT_H */ diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 91ada52f776b..a7533780dddc 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -508,7 +508,6 @@ static const struct net_device_ops netdev_ops = { .ndo_get_stats = el3_get_stats, .ndo_set_rx_mode = set_multicast_list, .ndo_tx_timeout = el3_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1041,67 +1040,68 @@ el3_link_ok(struct net_device *dev) } static int -el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) +el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) { u16 tmp; int ioaddr = dev->base_addr; + u32 supported; EL3WINDOW(0); /* obtain current transceiver via WN4_MEDIA? */ tmp = inw(ioaddr + WN0_ADDR_CONF); - ecmd->transceiver = XCVR_INTERNAL; switch (tmp >> 14) { case 0: - ecmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case 1: - ecmd->port = PORT_AUI; - ecmd->transceiver = XCVR_EXTERNAL; + cmd->base.port = PORT_AUI; break; case 3: - ecmd->port = PORT_BNC; + cmd->base.port = PORT_BNC; default: break; } - ecmd->duplex = DUPLEX_HALF; - ecmd->supported = 0; + cmd->base.duplex = DUPLEX_HALF; + supported = 0; tmp = inw(ioaddr + WN0_CONF_CTRL); if (tmp & (1<<13)) - ecmd->supported |= SUPPORTED_AUI; + supported |= SUPPORTED_AUI; if (tmp & (1<<12)) - ecmd->supported |= SUPPORTED_BNC; + supported |= SUPPORTED_BNC; if (tmp & (1<<9)) { - ecmd->supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half | + supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full; /* hmm... */ EL3WINDOW(4); tmp = inw(ioaddr + WN4_NETDIAG); if (tmp & FD_ENABLE) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; } - ethtool_cmd_speed_set(ecmd, SPEED_10); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + cmd->base.speed = SPEED_10; EL3WINDOW(1); return 0; } static int -el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) +el3_netdev_set_ecmd(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { u16 tmp; int ioaddr = dev->base_addr; - if (ecmd->speed != SPEED_10) + if (cmd->base.speed != SPEED_10) return -EINVAL; - if ((ecmd->duplex != DUPLEX_HALF) && (ecmd->duplex != DUPLEX_FULL)) - return -EINVAL; - if ((ecmd->transceiver != XCVR_INTERNAL) && (ecmd->transceiver != XCVR_EXTERNAL)) + if ((cmd->base.duplex != DUPLEX_HALF) && + (cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; /* change XCVR type */ EL3WINDOW(0); tmp = inw(ioaddr + WN0_ADDR_CONF); - switch (ecmd->port) { + switch (cmd->base.port) { case PORT_TP: tmp &= ~(3<<14); dev->if_port = 0; @@ -1131,7 +1131,7 @@ el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) EL3WINDOW(4); tmp = inw(ioaddr + WN4_NETDIAG); - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) tmp |= FD_ENABLE; else tmp &= ~FD_ENABLE; @@ -1147,24 +1147,26 @@ static void el3_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->version, DRV_VERSION, sizeof(info->version)); } -static int el3_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int el3_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct el3_private *lp = netdev_priv(dev); int ret; spin_lock_irq(&lp->lock); - ret = el3_netdev_get_ecmd(dev, ecmd); + ret = el3_netdev_get_ecmd(dev, cmd); spin_unlock_irq(&lp->lock); return ret; } -static int el3_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int el3_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct el3_private *lp = netdev_priv(dev); int ret; spin_lock_irq(&lp->lock); - ret = el3_netdev_set_ecmd(dev, ecmd); + ret = el3_netdev_set_ecmd(dev, cmd); spin_unlock_irq(&lp->lock); return ret; } @@ -1192,11 +1194,11 @@ static void el3_set_msglevel(struct net_device *dev, u32 v) static const struct ethtool_ops ethtool_ops = { .get_drvinfo = el3_get_drvinfo, - .get_settings = el3_get_settings, - .set_settings = el3_set_settings, .get_link = el3_get_link, .get_msglevel = el3_get_msglevel, .set_msglevel = el3_set_msglevel, + .get_link_ksettings = el3_get_link_ksettings, + .set_link_ksettings = el3_set_link_ksettings, }; static void diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b26e038b4a0e..b9f4c463e516 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -570,7 +570,6 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = corkscrew_timeout, .ndo_get_stats = corkscrew_get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index b88afd759307..9359a37fedc0 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -254,7 +254,6 @@ static const struct net_device_ops el3_netdev_ops = { .ndo_get_stats = el3_get_stats, .ndo_do_ioctl = el3_ioctl, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 71396e4b87e3..e28254a00599 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -188,7 +188,6 @@ static const struct net_device_ops el3_netdev_ops = { .ndo_set_config = el3_config, .ndo_get_stats = el3_get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 9133e7926da5..b3560a364e53 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1062,7 +1062,6 @@ static const struct net_device_ops boomrang_netdev_ops = { .ndo_do_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1080,7 +1079,6 @@ static const struct net_device_ops vortex_netdev_ops = { .ndo_do_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2909,18 +2907,20 @@ static int vortex_nway_reset(struct net_device *dev) return mii_nway_restart(&vp->mii); } -static int vortex_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int vortex_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct vortex_private *vp = netdev_priv(dev); - return mii_ethtool_gset(&vp->mii, cmd); + return mii_ethtool_get_link_ksettings(&vp->mii, cmd); } -static int vortex_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int vortex_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct vortex_private *vp = netdev_priv(dev); - return mii_ethtool_sset(&vp->mii, cmd); + return mii_ethtool_set_link_ksettings(&vp->mii, cmd); } static u32 vortex_get_msglevel(struct net_device *dev) @@ -3033,13 +3033,13 @@ static const struct ethtool_ops vortex_ethtool_ops = { .set_msglevel = vortex_set_msglevel, .get_ethtool_stats = vortex_get_ethtool_stats, .get_sset_count = vortex_get_sset_count, - .get_settings = vortex_get_settings, - .set_settings = vortex_set_settings, .get_link = ethtool_op_get_link, .nway_reset = vortex_nway_reset, .get_wol = vortex_get_wol, .set_wol = vortex_set_wol, .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = vortex_get_link_ksettings, + .set_link_ksettings = vortex_set_link_ksettings, }; #ifdef CONFIG_PCI diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 8f8418d2ac4a..a0cacbe846ba 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -996,28 +996,30 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) } static int -typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +typhoon_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct typhoon *tp = netdev_priv(dev); + u32 supported, advertising = 0; - cmd->supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg; switch (tp->xcvr_select) { case TYPHOON_XCVR_10HALF: - cmd->advertising = ADVERTISED_10baseT_Half; + advertising = ADVERTISED_10baseT_Half; break; case TYPHOON_XCVR_10FULL: - cmd->advertising = ADVERTISED_10baseT_Full; + advertising = ADVERTISED_10baseT_Full; break; case TYPHOON_XCVR_100HALF: - cmd->advertising = ADVERTISED_100baseT_Half; + advertising = ADVERTISED_100baseT_Half; break; case TYPHOON_XCVR_100FULL: - cmd->advertising = ADVERTISED_100baseT_Full; + advertising = ADVERTISED_100baseT_Full; break; case TYPHOON_XCVR_AUTONEG: - cmd->advertising = ADVERTISED_10baseT_Half | + advertising = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | @@ -1026,54 +1028,57 @@ typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) } if(tp->capabilities & TYPHOON_FIBER) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_FIBRE; - cmd->port = PORT_FIBRE; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_FIBRE; + cmd->base.port = PORT_FIBRE; } else { - cmd->supported |= SUPPORTED_10baseT_Half | + supported |= SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_TP; - cmd->advertising |= ADVERTISED_TP; - cmd->port = PORT_TP; + advertising |= ADVERTISED_TP; + cmd->base.port = PORT_TP; } /* need to get stats to make these link speed/duplex valid */ typhoon_do_get_stats(tp); - ethtool_cmd_speed_set(cmd, tp->speed); - cmd->duplex = tp->duplex; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; + cmd->base.speed = tp->speed; + cmd->base.duplex = tp->duplex; + cmd->base.phy_address = 0; if(tp->xcvr_select == TYPHOON_XCVR_AUTONEG) - cmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 1; - cmd->maxrxpkt = 1; + cmd->base.autoneg = AUTONEG_DISABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } static int -typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +typhoon_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct typhoon *tp = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; struct cmd_desc xp_cmd; __le16 xcvr; int err; err = -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { xcvr = TYPHOON_XCVR_AUTONEG; } else { - if (cmd->duplex == DUPLEX_HALF) { + if (cmd->base.duplex == DUPLEX_HALF) { if (speed == SPEED_10) xcvr = TYPHOON_XCVR_10HALF; else if (speed == SPEED_100) xcvr = TYPHOON_XCVR_100HALF; else goto out; - } else if (cmd->duplex == DUPLEX_FULL) { + } else if (cmd->base.duplex == DUPLEX_FULL) { if (speed == SPEED_10) xcvr = TYPHOON_XCVR_10FULL; else if (speed == SPEED_100) @@ -1091,12 +1096,12 @@ typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) goto out; tp->xcvr_select = xcvr; - if(cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { tp->speed = 0xff; /* invalid */ tp->duplex = 0xff; /* invalid */ } else { tp->speed = speed; - tp->duplex = cmd->duplex; + tp->duplex = cmd->base.duplex; } out: @@ -1145,13 +1150,13 @@ typhoon_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) } static const struct ethtool_ops typhoon_ethtool_ops = { - .get_settings = typhoon_get_settings, - .set_settings = typhoon_set_settings, .get_drvinfo = typhoon_get_drvinfo, .get_wol = typhoon_get_wol, .set_wol = typhoon_set_wol, .get_link = ethtool_op_get_link, .get_ringparam = typhoon_get_ringparam, + .get_link_ksettings = typhoon_get_link_ksettings, + .set_link_ksettings = typhoon_set_link_ksettings, }; static int @@ -2255,7 +2260,6 @@ static const struct net_device_ops typhoon_netdev_ops = { .ndo_get_stats = typhoon_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, }; static int diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c index 5db1f55abef4..a43544af257b 100644 --- a/drivers/net/ethernet/8390/8390.c +++ b/drivers/net/ethernet/8390/8390.c @@ -64,7 +64,6 @@ const struct net_device_ops ei_netdev_ops = { .ndo_set_rx_mode = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ei_poll, #endif diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c index e8fc2e87e840..46d2257c4430 100644 --- a/drivers/net/ethernet/8390/8390p.c +++ b/drivers/net/ethernet/8390/8390p.c @@ -69,7 +69,6 @@ const struct net_device_ops eip_netdev_ops = { .ndo_set_rx_mode = eip_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = eip_poll, #endif diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 39ca9350d1b2..b0a3b85fc6f8 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -536,7 +536,6 @@ static const struct net_device_ops ax_netdev_ops = { .ndo_set_rx_mode = ax_ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ax_ei_poll, #endif diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 4ea717d68c95..1d84a0544ace 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -134,7 +134,6 @@ static const struct net_device_ops axnet_netdev_ops = { .ndo_tx_timeout = axnet_tx_timeout, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c index d686b9cac29f..11cbf22ad201 100644 --- a/drivers/net/ethernet/8390/etherh.c +++ b/drivers/net/ethernet/8390/etherh.c @@ -654,7 +654,6 @@ static const struct net_device_ops etherh_netdev_ops = { .ndo_set_rx_mode = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = __ei_poll, #endif diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c index 0fe19d609c2e..8ae249195301 100644 --- a/drivers/net/ethernet/8390/hydra.c +++ b/drivers/net/ethernet/8390/hydra.c @@ -105,7 +105,6 @@ static const struct net_device_ops hydra_netdev_ops = { .ndo_set_rx_mode = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = __ei_poll, #endif diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index b9283901136e..9497f18eaba0 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -483,7 +483,6 @@ static const struct net_device_ops mac8390_netdev_ops = { .ndo_set_rx_mode = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = __ei_poll, #endif diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index e1c055574a11..4bb967bc879e 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -308,7 +308,6 @@ static const struct net_device_ops mcf8390_netdev_ops = { .ndo_set_rx_mode = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = __ei_poll, #endif diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c index 57e97910c728..07355302443d 100644 --- a/drivers/net/ethernet/8390/ne2k-pci.c +++ b/drivers/net/ethernet/8390/ne2k-pci.c @@ -209,7 +209,6 @@ static const struct net_device_ops ne2k_netdev_ops = { .ndo_set_rx_mode = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ei_poll, #endif diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 2f79d29f17f2..63079a6e20d9 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -227,7 +227,6 @@ static const struct net_device_ops pcnet_netdev_ops = { .ndo_do_ioctl = ei_ioctl, .ndo_set_rx_mode = ei_set_multicast_list, .ndo_tx_timeout = ei_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 139385dcdaa7..364b6514f65f 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -195,7 +195,6 @@ static const struct net_device_ops ultra_netdev_ops = { .ndo_set_rx_mode = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ultra_poll, #endif diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index dd7d816bde52..ad019cbc698f 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -156,7 +156,6 @@ static const struct net_device_ops wd_netdev_ops = { .ndo_set_rx_mode = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ei_poll, #endif diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index 8308728fad05..6d93956b293b 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -284,7 +284,6 @@ static const struct net_device_ops zorro8390_netdev_ops = { .ndo_set_rx_mode = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = __ei_poll, #endif diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 8af2c88d5b33..3aaad33cdbc6 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -634,7 +634,6 @@ static const struct net_device_ops netdev_ops = { .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, .ndo_do_ioctl = netdev_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef VLAN_SUPPORT @@ -1817,21 +1816,23 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) strlcpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); } -static int get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct netdev_private *np = netdev_priv(dev); spin_lock_irq(&np->lock); - mii_ethtool_gset(&np->mii_if, ecmd); + mii_ethtool_get_link_ksettings(&np->mii_if, cmd); spin_unlock_irq(&np->lock); return 0; } -static int set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct netdev_private *np = netdev_priv(dev); int res; spin_lock_irq(&np->lock); - res = mii_ethtool_sset(&np->mii_if, ecmd); + res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd); spin_unlock_irq(&np->lock); check_duplex(dev); return res; @@ -1862,12 +1863,12 @@ static void set_msglevel(struct net_device *dev, u32 val) static const struct ethtool_ops ethtool_ops = { .begin = check_if_running, .get_drvinfo = get_drvinfo, - .get_settings = get_settings, - .set_settings = set_settings, .nway_reset = nway_reset, .get_link = get_link, .get_msglevel = get_msglevel, .set_msglevel = set_msglevel, + .get_link_ksettings = get_link_ksettings, + .set_link_ksettings = set_link_ksettings, }; static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 00f9ee3fc3e5..88164529b52a 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1571,7 +1571,6 @@ static const struct net_device_ops bfin_mac_netdev_ops = { .ndo_set_rx_mode = bfin_mac_set_multicast_list, .ndo_do_ioctl = bfin_mac_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = bfin_mac_poll_controller, #endif diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index f8df8248035e..93def92f9997 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1290,15 +1290,6 @@ static int greth_mdio_probe(struct net_device *dev) return 0; } -static inline int phy_aneg_done(struct phy_device *phydev) -{ - int retval; - - retval = phy_read(phydev, MII_BMSR); - - return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); -} - static int greth_mdio_init(struct greth_private *greth) { int ret; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 906683851c7d..831bab352f8e 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -176,6 +176,8 @@ MODULE_DESCRIPTION("10/100/1000 Base-T Ethernet Driver for the ET1310 by Agere S #define NUM_FBRS 2 #define MAX_PACKETS_HANDLED 256 +#define ET131X_MIN_MTU 64 +#define ET131X_MAX_MTU 9216 #define ALCATEL_MULTICAST_PKT 0x01000000 #define ALCATEL_BROADCAST_PKT 0x02000000 @@ -3869,9 +3871,6 @@ static int et131x_change_mtu(struct net_device *netdev, int new_mtu) int result = 0; struct et131x_adapter *adapter = netdev_priv(netdev); - if (new_mtu < 64 || new_mtu > 9216) - return -EINVAL; - et131x_disable_txrx(netdev); netdev->mtu = new_mtu; @@ -3958,6 +3957,8 @@ static int et131x_pci_setup(struct pci_dev *pdev, netdev->watchdog_timeo = ET131X_TX_TIMEOUT; netdev->netdev_ops = &et131x_netdev_ops; + netdev->min_mtu = ET131X_MIN_MTU; + netdev->max_mtu = ET131X_MAX_MTU; SET_NETDEV_DEV(netdev, &pdev->dev); netdev->ethtool_ops = &et131x_ethtool_ops; diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 6ffdff68bfc4..af27f9dbedf2 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -773,7 +773,6 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_tx_timeout = emac_timeout, .ndo_set_rx_mode = emac_set_rx_mode, .ndo_do_ioctl = emac_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index b90a26b13fdf..16f0c70266bc 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -429,14 +429,16 @@ static const char version[] = "acenic.c: v0.92 08/05/2002 Jes Sorensen, linux-acenic@SunSITE.dk\n" " http://home.cern.ch/~jes/gige/acenic.html\n"; -static int ace_get_settings(struct net_device *, struct ethtool_cmd *); -static int ace_set_settings(struct net_device *, struct ethtool_cmd *); +static int ace_get_link_ksettings(struct net_device *, + struct ethtool_link_ksettings *); +static int ace_set_link_ksettings(struct net_device *, + const struct ethtool_link_ksettings *); static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); static const struct ethtool_ops ace_ethtool_ops = { - .get_settings = ace_get_settings, - .set_settings = ace_set_settings, .get_drvinfo = ace_get_drvinfo, + .get_link_ksettings = ace_get_link_ksettings, + .set_link_ksettings = ace_set_link_ksettings, }; static void ace_watchdog(struct net_device *dev); @@ -474,6 +476,8 @@ static int acenic_probe_one(struct pci_dev *pdev, dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->watchdog_timeo = 5*HZ; + dev->min_mtu = 0; + dev->max_mtu = ACE_JUMBO_MTU; dev->netdev_ops = &ace_netdev_ops; dev->ethtool_ops = &ace_ethtool_ops; @@ -2548,9 +2552,6 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; - if (new_mtu > ACE_JUMBO_MTU) - return -EINVAL; - writel(new_mtu + ETH_HLEN + 4, ®s->IfMtu); dev->mtu = new_mtu; @@ -2580,43 +2581,44 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int ace_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; u32 link; + u32 supported; + + memset(cmd, 0, sizeof(struct ethtool_link_ksettings)); - memset(ecmd, 0, sizeof(struct ethtool_cmd)); - ecmd->supported = - (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_FIBRE); + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_FIBRE); - ecmd->port = PORT_FIBRE; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_FIBRE; link = readl(®s->GigLnkState); - if (link & LNK_1000MB) - ethtool_cmd_speed_set(ecmd, SPEED_1000); - else { + if (link & LNK_1000MB) { + cmd->base.speed = SPEED_1000; + } else { link = readl(®s->FastLnkState); if (link & LNK_100MB) - ethtool_cmd_speed_set(ecmd, SPEED_100); + cmd->base.speed = SPEED_100; else if (link & LNK_10MB) - ethtool_cmd_speed_set(ecmd, SPEED_10); + cmd->base.speed = SPEED_10; else - ethtool_cmd_speed_set(ecmd, 0); + cmd->base.speed = 0; } if (link & LNK_FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; if (link & LNK_NEGOTIATE) - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - ecmd->autoneg = AUTONEG_DISABLE; + cmd->base.autoneg = AUTONEG_DISABLE; #if 0 /* @@ -2627,13 +2629,15 @@ static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) ecmd->txcoal = readl(®s->TuneTxCoalTicks); ecmd->rxcoal = readl(®s->TuneRxCoalTicks); #endif - ecmd->maxtxpkt = readl(®s->TuneMaxTxDesc); - ecmd->maxrxpkt = readl(®s->TuneMaxRxDesc); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); return 0; } -static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int ace_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; @@ -2656,11 +2660,11 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL; if (!ACE_IS_TIGON_I(ap)) link |= LNK_TX_FLOW_CTL_Y; - if (ecmd->autoneg == AUTONEG_ENABLE) + if (cmd->base.autoneg == AUTONEG_ENABLE) link |= LNK_NEGOTIATE; - if (ethtool_cmd_speed(ecmd) != speed) { + if (cmd->base.speed != speed) { link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB); - switch (ethtool_cmd_speed(ecmd)) { + switch (cmd->base.speed) { case SPEED_1000: link |= LNK_1000MB; break; @@ -2673,7 +2677,7 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) } } - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) link |= LNK_FULL_DUPLEX; if (link != ap->link) { diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h index e0052003d16f..9b640c8fbc28 100644 --- a/drivers/net/ethernet/altera/altera_tse.h +++ b/drivers/net/ethernet/altera/altera_tse.h @@ -443,7 +443,6 @@ struct altera_tse_private { /* RX/TX MAC FIFO configs */ u32 tx_fifo_depth; u32 rx_fifo_depth; - u32 max_mtu; /* Hash filter settings */ u32 hash_filter; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index bda31f308cc2..8e9208434262 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -994,20 +994,11 @@ static void tse_set_mac(struct altera_tse_private *priv, bool enable) */ static int tse_change_mtu(struct net_device *dev, int new_mtu) { - struct altera_tse_private *priv = netdev_priv(dev); - unsigned int max_mtu = priv->max_mtu; - unsigned int min_mtu = ETH_ZLEN + ETH_FCS_LEN; - if (netif_running(dev)) { netdev_err(dev, "must be stopped to change its MTU\n"); return -EBUSY; } - if ((new_mtu < min_mtu) || (new_mtu > max_mtu)) { - netdev_err(dev, "invalid MTU, max MTU is: %u\n", max_mtu); - return -EINVAL; - } - dev->mtu = new_mtu; netdev_update_features(dev); @@ -1338,11 +1329,13 @@ static int altera_tse_probe(struct platform_device *pdev) if (upper_32_bits(priv->rxdescmem_busaddr)) { dev_dbg(priv->device, "SGDMA bus addresses greater than 32-bits\n"); + ret = -EINVAL; goto err_free_netdev; } if (upper_32_bits(priv->txdescmem_busaddr)) { dev_dbg(priv->device, "SGDMA bus addresses greater than 32-bits\n"); + ret = -EINVAL; goto err_free_netdev; } } else if (priv->dmaops && @@ -1446,15 +1439,16 @@ static int altera_tse_probe(struct platform_device *pdev) of_property_read_bool(pdev->dev.of_node, "altr,has-supplementary-unicast"); + priv->dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN; /* Max MTU is 1500, ETH_DATA_LEN */ - priv->max_mtu = ETH_DATA_LEN; + priv->dev->max_mtu = ETH_DATA_LEN; /* Get the max mtu from the device tree. Note that the * "max-frame-size" parameter is actually max mtu. Definition * in the ePAPR v1.1 spec and usage differ, so go with usage. */ of_property_read_u32(pdev->dev.of_node, "max-frame-size", - &priv->max_mtu); + &priv->dev->max_mtu); /* The DMA buffer size already accounts for an alignment bias * to avoid unaligned access exceptions for the NIOS processor, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index bfeaec5bd7b9..cc8b13ebfa75 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -103,13 +103,6 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) struct ena_adapter *adapter = netdev_priv(dev); int ret; - if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { - netif_err(adapter, drv, dev, - "Invalid MTU setting. new_mtu: %d\n", new_mtu); - - return -EINVAL; - } - ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); if (!ret) { netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); @@ -2755,6 +2748,8 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter, ena_set_dev_offloads(feat, netdev); adapter->max_mtu = feat->dev_attr.max_mtu; + netdev->max_mtu = adapter->max_mtu; + netdev->min_mtu = ENA_MIN_MTU; } static int ena_rss_init_default(struct ena_adapter *adapter) @@ -3018,12 +3013,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->last_keep_alive_jiffies = jiffies; - init_timer(&adapter->timer_service); - adapter->timer_service.expires = round_jiffies(jiffies + HZ); - adapter->timer_service.function = ena_timer_service; - adapter->timer_service.data = (unsigned long)adapter; - - add_timer(&adapter->timer_service); + setup_timer(&adapter->timer_service, ena_timer_service, + (unsigned long)adapter); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", DEVICE_NAME, (long)pci_resource_start(pdev, 0), diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 0038709fd317..7ab6efbe4189 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -173,11 +173,13 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on ((OF_NET && OF_ADDRESS) || ACPI) && HAS_IOMEM && HAS_DMA - depends on ARM64 || COMPILE_TEST + depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA + depends on X86 || ARM64 || COMPILE_TEST select BITREVERSE select CRC32 select PTP_1588_CLOCK + select PHYLIB + select AMD_XGBE_HAVE_ECC if X86 ---help--- This driver supports the AMD 10GbE Ethernet device found on an AMD SoC. @@ -195,4 +197,8 @@ config AMD_XGBE_DCB If unsure, say N. +config AMD_XGBE_HAVE_ECC + bool + default n + endif # NET_VENDOR_AMD diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index a83cd1c4ce1d..ee4b94e3cda9 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -665,7 +665,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_tx_timeout = lance_tx_timeout, .ndo_set_rx_mode = lance_set_multicast, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index fcdf5dda448f..b11e910850f7 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -663,7 +663,6 @@ static const struct net_device_ops am79c961_netdev_ops = { .ndo_set_rx_mode = am79c961_setmulticastlist, .ndo_tx_timeout = am79c961_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = am79c961_poll_controller, diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index f92cc97151ec..11cf1e3e0295 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1421,21 +1421,23 @@ static void amd8111e_get_regs(struct net_device *dev, struct ethtool_regs *regs, amd8111e_read_regs(lp, buf); } -static int amd8111e_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int amd8111e_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct amd8111e_priv *lp = netdev_priv(dev); spin_lock_irq(&lp->lock); - mii_ethtool_gset(&lp->mii_if, ecmd); + mii_ethtool_get_link_ksettings(&lp->mii_if, cmd); spin_unlock_irq(&lp->lock); return 0; } -static int amd8111e_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int amd8111e_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct amd8111e_priv *lp = netdev_priv(dev); int res; spin_lock_irq(&lp->lock); - res = mii_ethtool_sset(&lp->mii_if, ecmd); + res = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd); spin_unlock_irq(&lp->lock); return res; } @@ -1482,12 +1484,12 @@ static const struct ethtool_ops ops = { .get_drvinfo = amd8111e_get_drvinfo, .get_regs_len = amd8111e_get_regs_len, .get_regs = amd8111e_get_regs, - .get_settings = amd8111e_get_settings, - .set_settings = amd8111e_set_settings, .nway_reset = amd8111e_nway_reset, .get_link = amd8111e_get_link, .get_wol = amd8111e_get_wol, .set_wol = amd8111e_set_wol, + .get_link_ksettings = amd8111e_get_link_ksettings, + .set_link_ksettings = amd8111e_set_link_ksettings, }; /* This function handles all the ethtool ioctls. It gives driver info, @@ -1556,9 +1558,6 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu) struct amd8111e_priv *lp = netdev_priv(dev); int err; - if ((new_mtu < AMD8111E_MIN_MTU) || (new_mtu > AMD8111E_MAX_MTU)) - return -EINVAL; - if (!netif_running(dev)) { /* new_mtu will be used * when device starts netxt time @@ -1874,6 +1873,8 @@ static int amd8111e_probe_one(struct pci_dev *pdev, dev->ethtool_ops = &ops; dev->irq =pdev->irq; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; + dev->min_mtu = AMD8111E_MIN_MTU; + dev->max_mtu = AMD8111E_MAX_MTU; netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); #if AMD8111E_VLAN_TAG_USED diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 968b7bfac8fc..5fd7b15b0574 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -706,7 +706,6 @@ static const struct net_device_ops ariadne_netdev_ops = { .ndo_get_stats = ariadne_get_stats, .ndo_set_rx_mode = set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index d2bc8e5dcd23..e53ccc3b7d8d 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -460,7 +460,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_set_mac_address = lance_set_mac_address, .ndo_tx_timeout = lance_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static unsigned long __init lance_probe1( struct net_device *dev, diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index df664187cd82..a3c90fe5de00 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1103,7 +1103,6 @@ static const struct net_device_ops au1000_netdev_ops = { .ndo_tx_timeout = au1000_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int au1000_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index b799c7ac899b..76e5fc7adff5 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1013,7 +1013,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_start_xmit = lance_start_xmit, .ndo_tx_timeout = lance_tx_timeout, .ndo_set_rx_mode = lance_set_multicast, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c index 6c9de117ffc6..c3dbf1c8a269 100644 --- a/drivers/net/ethernet/amd/hplance.c +++ b/drivers/net/ethernet/amd/hplance.c @@ -72,7 +72,6 @@ static const struct net_device_ops hplance_netdev_ops = { .ndo_stop = hplance_close, .ndo_start_xmit = lance_start_xmit, .ndo_set_rx_mode = lance_set_multicast, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index abb1ba228b26..61a641f23149 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -461,7 +461,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_get_stats = lance_get_stats, .ndo_set_rx_mode = set_multicast_list, .ndo_tx_timeout = lance_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 0660ac5846bb..0a920448522f 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -62,7 +62,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_start_xmit = lance_start_xmit, .ndo_set_rx_mode = lance_set_multicast, .ndo_tx_timeout = lance_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index cda53db75f17..5985bf220a8d 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -407,7 +407,6 @@ static const struct net_device_ops ni65_netdev_ops = { .ndo_start_xmit = ni65_send_packet, .ndo_tx_timeout = ni65_timeout, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 2807e181647b..113a3b3cc50c 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -427,7 +427,6 @@ static const struct net_device_ops mace_netdev_ops = { .ndo_set_config = mace_config, .ndo_get_stats = mace_get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index c22bf52d3320..41e58cca8fee 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -677,7 +677,8 @@ static void pcnet32_poll_controller(struct net_device *dev) } #endif -static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int pcnet32_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long flags; @@ -685,14 +686,15 @@ static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (lp->mii) { spin_lock_irqsave(&lp->lock, flags); - mii_ethtool_gset(&lp->mii_if, cmd); + mii_ethtool_get_link_ksettings(&lp->mii_if, cmd); spin_unlock_irqrestore(&lp->lock, flags); r = 0; } return r; } -static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int pcnet32_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long flags; @@ -700,7 +702,7 @@ static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (lp->mii) { spin_lock_irqsave(&lp->lock, flags); - r = mii_ethtool_sset(&lp->mii_if, cmd); + r = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd); spin_unlock_irqrestore(&lp->lock, flags); } return r; @@ -1440,8 +1442,6 @@ static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, } static const struct ethtool_ops pcnet32_ethtool_ops = { - .get_settings = pcnet32_get_settings, - .set_settings = pcnet32_set_settings, .get_drvinfo = pcnet32_get_drvinfo, .get_msglevel = pcnet32_get_msglevel, .set_msglevel = pcnet32_set_msglevel, @@ -1455,6 +1455,8 @@ static const struct ethtool_ops pcnet32_ethtool_ops = { .get_regs_len = pcnet32_get_regs_len, .get_regs = pcnet32_get_regs, .get_sset_count = pcnet32_get_sset_count, + .get_link_ksettings = pcnet32_get_link_ksettings, + .set_link_ksettings = pcnet32_set_link_ksettings, }; /* only probes for non-PCI devices, the rest are handled by @@ -1527,7 +1529,6 @@ static const struct net_device_ops pcnet32_netdev_ops = { .ndo_get_stats = pcnet32_get_stats, .ndo_set_rx_mode = pcnet32_set_multicast_list, .ndo_do_ioctl = pcnet32_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index 3d8c6b2cdea4..12bb4f1489fc 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -299,7 +299,6 @@ static const struct net_device_ops lance_netdev_ops = { .ndo_start_xmit = lance_start_xmit, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = NULL, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 9b56b40259dc..291ca5187f12 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1294,7 +1294,6 @@ static const struct net_device_ops sparc_lance_ops = { .ndo_start_xmit = lance_start_xmit, .ndo_set_rx_mode = lance_set_multicast, .ndo_tx_timeout = lance_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile index 171a7e68048d..0dea8f5da899 100644 --- a/drivers/net/ethernet/amd/xgbe/Makefile +++ b/drivers/net/ethernet/amd/xgbe/Makefile @@ -2,7 +2,10 @@ obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \ xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \ - xgbe-ptp.o + xgbe-ptp.o \ + xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \ + xgbe-platform.o +amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index bbef95973c27..5b7ba25e0065 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -159,6 +159,8 @@ #define DMA_ISR_MACIS_WIDTH 1 #define DMA_ISR_MTLIS_INDEX 16 #define DMA_ISR_MTLIS_WIDTH 1 +#define DMA_MR_INTM_INDEX 12 +#define DMA_MR_INTM_WIDTH 2 #define DMA_MR_SWR_INDEX 0 #define DMA_MR_SWR_WIDTH 1 #define DMA_SBMR_EAME_INDEX 11 @@ -309,6 +311,11 @@ #define MAC_HWF0R 0x011c #define MAC_HWF1R 0x0120 #define MAC_HWF2R 0x0124 +#define MAC_MDIOSCAR 0x0200 +#define MAC_MDIOSCCDR 0x0204 +#define MAC_MDIOISR 0x0214 +#define MAC_MDIOIER 0x0218 +#define MAC_MDIOCL22R 0x0220 #define MAC_GPIOCR 0x0278 #define MAC_GPIOSR 0x027c #define MAC_MACA0HR 0x0300 @@ -409,10 +416,34 @@ #define MAC_ISR_MMCTXIS_WIDTH 1 #define MAC_ISR_PMTIS_INDEX 4 #define MAC_ISR_PMTIS_WIDTH 1 +#define MAC_ISR_SMI_INDEX 1 +#define MAC_ISR_SMI_WIDTH 1 #define MAC_ISR_TSIS_INDEX 12 #define MAC_ISR_TSIS_WIDTH 1 #define MAC_MACA1HR_AE_INDEX 31 #define MAC_MACA1HR_AE_WIDTH 1 +#define MAC_MDIOIER_SNGLCOMPIE_INDEX 12 +#define MAC_MDIOIER_SNGLCOMPIE_WIDTH 1 +#define MAC_MDIOISR_SNGLCOMPINT_INDEX 12 +#define MAC_MDIOISR_SNGLCOMPINT_WIDTH 1 +#define MAC_MDIOSCAR_DA_INDEX 21 +#define MAC_MDIOSCAR_DA_WIDTH 5 +#define MAC_MDIOSCAR_PA_INDEX 16 +#define MAC_MDIOSCAR_PA_WIDTH 5 +#define MAC_MDIOSCAR_RA_INDEX 0 +#define MAC_MDIOSCAR_RA_WIDTH 16 +#define MAC_MDIOSCAR_REG_INDEX 0 +#define MAC_MDIOSCAR_REG_WIDTH 21 +#define MAC_MDIOSCCDR_BUSY_INDEX 22 +#define MAC_MDIOSCCDR_BUSY_WIDTH 1 +#define MAC_MDIOSCCDR_CMD_INDEX 16 +#define MAC_MDIOSCCDR_CMD_WIDTH 2 +#define MAC_MDIOSCCDR_CR_INDEX 19 +#define MAC_MDIOSCCDR_CR_WIDTH 3 +#define MAC_MDIOSCCDR_DATA_INDEX 0 +#define MAC_MDIOSCCDR_DATA_WIDTH 16 +#define MAC_MDIOSCCDR_SADDR_INDEX 18 +#define MAC_MDIOSCCDR_SADDR_WIDTH 1 #define MAC_PFR_HMC_INDEX 2 #define MAC_PFR_HMC_WIDTH 1 #define MAC_PFR_HPF_INDEX 10 @@ -790,6 +821,10 @@ #define MTL_Q_RQOMR_RSF_WIDTH 1 #define MTL_Q_RQOMR_RTC_INDEX 0 #define MTL_Q_RQOMR_RTC_WIDTH 2 +#define MTL_Q_TQDR_TRCSTS_INDEX 1 +#define MTL_Q_TQDR_TRCSTS_WIDTH 2 +#define MTL_Q_TQDR_TXQSTS_INDEX 4 +#define MTL_Q_TQDR_TXQSTS_WIDTH 1 #define MTL_Q_TQOMR_FTQ_INDEX 0 #define MTL_Q_TQOMR_FTQ_WIDTH 1 #define MTL_Q_TQOMR_Q2TCMAP_INDEX 8 @@ -852,14 +887,16 @@ #define MTL_TSA_SP 0x00 #define MTL_TSA_ETS 0x02 -/* PCS MMD select register offset - * The MMD select register is used for accessing PCS registers - * when the underlying APB3 interface is using indirect addressing. - * Indirect addressing requires accessing registers in two phases, - * an address phase and a data phase. The address phases requires - * writing an address selection value to the MMD select regiesters. - */ -#define PCS_MMD_SELECT 0xff +/* PCS register offsets */ +#define PCS_V1_WINDOW_SELECT 0x03fc +#define PCS_V2_WINDOW_DEF 0x9060 +#define PCS_V2_WINDOW_SELECT 0x9064 + +/* PCS register entry bit positions and sizes */ +#define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6 +#define PCS_V2_WINDOW_DEF_OFFSET_WIDTH 14 +#define PCS_V2_WINDOW_DEF_SIZE_INDEX 2 +#define PCS_V2_WINDOW_DEF_SIZE_WIDTH 4 /* SerDes integration register offsets */ #define SIR0_KR_RT_1 0x002c @@ -903,6 +940,198 @@ #define RXTX_REG129_RXDFE_CONFIG_INDEX 14 #define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 +/* MAC Control register offsets */ +#define XP_PROP_0 0x0000 +#define XP_PROP_1 0x0004 +#define XP_PROP_2 0x0008 +#define XP_PROP_3 0x000c +#define XP_PROP_4 0x0010 +#define XP_PROP_5 0x0014 +#define XP_MAC_ADDR_LO 0x0020 +#define XP_MAC_ADDR_HI 0x0024 +#define XP_ECC_ISR 0x0030 +#define XP_ECC_IER 0x0034 +#define XP_ECC_CNT0 0x003c +#define XP_ECC_CNT1 0x0040 +#define XP_DRIVER_INT_REQ 0x0060 +#define XP_DRIVER_INT_RO 0x0064 +#define XP_DRIVER_SCRATCH_0 0x0068 +#define XP_DRIVER_SCRATCH_1 0x006c +#define XP_INT_EN 0x0078 +#define XP_I2C_MUTEX 0x0080 +#define XP_MDIO_MUTEX 0x0084 + +/* MAC Control register entry bit positions and sizes */ +#define XP_DRIVER_INT_REQ_REQUEST_INDEX 0 +#define XP_DRIVER_INT_REQ_REQUEST_WIDTH 1 +#define XP_DRIVER_INT_RO_STATUS_INDEX 0 +#define XP_DRIVER_INT_RO_STATUS_WIDTH 1 +#define XP_DRIVER_SCRATCH_0_COMMAND_INDEX 0 +#define XP_DRIVER_SCRATCH_0_COMMAND_WIDTH 8 +#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_INDEX 8 +#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_WIDTH 8 +#define XP_ECC_CNT0_RX_DED_INDEX 24 +#define XP_ECC_CNT0_RX_DED_WIDTH 8 +#define XP_ECC_CNT0_RX_SEC_INDEX 16 +#define XP_ECC_CNT0_RX_SEC_WIDTH 8 +#define XP_ECC_CNT0_TX_DED_INDEX 8 +#define XP_ECC_CNT0_TX_DED_WIDTH 8 +#define XP_ECC_CNT0_TX_SEC_INDEX 0 +#define XP_ECC_CNT0_TX_SEC_WIDTH 8 +#define XP_ECC_CNT1_DESC_DED_INDEX 8 +#define XP_ECC_CNT1_DESC_DED_WIDTH 8 +#define XP_ECC_CNT1_DESC_SEC_INDEX 0 +#define XP_ECC_CNT1_DESC_SEC_WIDTH 8 +#define XP_ECC_IER_DESC_DED_INDEX 0 +#define XP_ECC_IER_DESC_DED_WIDTH 1 +#define XP_ECC_IER_DESC_SEC_INDEX 1 +#define XP_ECC_IER_DESC_SEC_WIDTH 1 +#define XP_ECC_IER_RX_DED_INDEX 2 +#define XP_ECC_IER_RX_DED_WIDTH 1 +#define XP_ECC_IER_RX_SEC_INDEX 3 +#define XP_ECC_IER_RX_SEC_WIDTH 1 +#define XP_ECC_IER_TX_DED_INDEX 4 +#define XP_ECC_IER_TX_DED_WIDTH 1 +#define XP_ECC_IER_TX_SEC_INDEX 5 +#define XP_ECC_IER_TX_SEC_WIDTH 1 +#define XP_ECC_ISR_DESC_DED_INDEX 0 +#define XP_ECC_ISR_DESC_DED_WIDTH 1 +#define XP_ECC_ISR_DESC_SEC_INDEX 1 +#define XP_ECC_ISR_DESC_SEC_WIDTH 1 +#define XP_ECC_ISR_RX_DED_INDEX 2 +#define XP_ECC_ISR_RX_DED_WIDTH 1 +#define XP_ECC_ISR_RX_SEC_INDEX 3 +#define XP_ECC_ISR_RX_SEC_WIDTH 1 +#define XP_ECC_ISR_TX_DED_INDEX 4 +#define XP_ECC_ISR_TX_DED_WIDTH 1 +#define XP_ECC_ISR_TX_SEC_INDEX 5 +#define XP_ECC_ISR_TX_SEC_WIDTH 1 +#define XP_I2C_MUTEX_BUSY_INDEX 31 +#define XP_I2C_MUTEX_BUSY_WIDTH 1 +#define XP_I2C_MUTEX_ID_INDEX 29 +#define XP_I2C_MUTEX_ID_WIDTH 2 +#define XP_I2C_MUTEX_ACTIVE_INDEX 0 +#define XP_I2C_MUTEX_ACTIVE_WIDTH 1 +#define XP_MAC_ADDR_HI_VALID_INDEX 31 +#define XP_MAC_ADDR_HI_VALID_WIDTH 1 +#define XP_PROP_0_CONN_TYPE_INDEX 28 +#define XP_PROP_0_CONN_TYPE_WIDTH 3 +#define XP_PROP_0_MDIO_ADDR_INDEX 16 +#define XP_PROP_0_MDIO_ADDR_WIDTH 5 +#define XP_PROP_0_PORT_ID_INDEX 0 +#define XP_PROP_0_PORT_ID_WIDTH 8 +#define XP_PROP_0_PORT_MODE_INDEX 8 +#define XP_PROP_0_PORT_MODE_WIDTH 4 +#define XP_PROP_0_PORT_SPEEDS_INDEX 23 +#define XP_PROP_0_PORT_SPEEDS_WIDTH 4 +#define XP_PROP_1_MAX_RX_DMA_INDEX 24 +#define XP_PROP_1_MAX_RX_DMA_WIDTH 5 +#define XP_PROP_1_MAX_RX_QUEUES_INDEX 8 +#define XP_PROP_1_MAX_RX_QUEUES_WIDTH 5 +#define XP_PROP_1_MAX_TX_DMA_INDEX 16 +#define XP_PROP_1_MAX_TX_DMA_WIDTH 5 +#define XP_PROP_1_MAX_TX_QUEUES_INDEX 0 +#define XP_PROP_1_MAX_TX_QUEUES_WIDTH 5 +#define XP_PROP_2_RX_FIFO_SIZE_INDEX 16 +#define XP_PROP_2_RX_FIFO_SIZE_WIDTH 16 +#define XP_PROP_2_TX_FIFO_SIZE_INDEX 0 +#define XP_PROP_2_TX_FIFO_SIZE_WIDTH 16 +#define XP_PROP_3_GPIO_MASK_INDEX 28 +#define XP_PROP_3_GPIO_MASK_WIDTH 4 +#define XP_PROP_3_GPIO_MOD_ABS_INDEX 20 +#define XP_PROP_3_GPIO_MOD_ABS_WIDTH 4 +#define XP_PROP_3_GPIO_RATE_SELECT_INDEX 16 +#define XP_PROP_3_GPIO_RATE_SELECT_WIDTH 4 +#define XP_PROP_3_GPIO_RX_LOS_INDEX 24 +#define XP_PROP_3_GPIO_RX_LOS_WIDTH 4 +#define XP_PROP_3_GPIO_TX_FAULT_INDEX 12 +#define XP_PROP_3_GPIO_TX_FAULT_WIDTH 4 +#define XP_PROP_3_GPIO_ADDR_INDEX 8 +#define XP_PROP_3_GPIO_ADDR_WIDTH 3 +#define XP_PROP_3_MDIO_RESET_INDEX 0 +#define XP_PROP_3_MDIO_RESET_WIDTH 2 +#define XP_PROP_3_MDIO_RESET_I2C_ADDR_INDEX 8 +#define XP_PROP_3_MDIO_RESET_I2C_ADDR_WIDTH 3 +#define XP_PROP_3_MDIO_RESET_I2C_GPIO_INDEX 12 +#define XP_PROP_3_MDIO_RESET_I2C_GPIO_WIDTH 4 +#define XP_PROP_3_MDIO_RESET_INT_GPIO_INDEX 4 +#define XP_PROP_3_MDIO_RESET_INT_GPIO_WIDTH 2 +#define XP_PROP_4_MUX_ADDR_HI_INDEX 8 +#define XP_PROP_4_MUX_ADDR_HI_WIDTH 5 +#define XP_PROP_4_MUX_ADDR_LO_INDEX 0 +#define XP_PROP_4_MUX_ADDR_LO_WIDTH 3 +#define XP_PROP_4_MUX_CHAN_INDEX 4 +#define XP_PROP_4_MUX_CHAN_WIDTH 3 +#define XP_PROP_4_REDRV_ADDR_INDEX 16 +#define XP_PROP_4_REDRV_ADDR_WIDTH 7 +#define XP_PROP_4_REDRV_IF_INDEX 23 +#define XP_PROP_4_REDRV_IF_WIDTH 1 +#define XP_PROP_4_REDRV_LANE_INDEX 24 +#define XP_PROP_4_REDRV_LANE_WIDTH 3 +#define XP_PROP_4_REDRV_MODEL_INDEX 28 +#define XP_PROP_4_REDRV_MODEL_WIDTH 3 +#define XP_PROP_4_REDRV_PRESENT_INDEX 31 +#define XP_PROP_4_REDRV_PRESENT_WIDTH 1 + +/* I2C Control register offsets */ +#define IC_CON 0x0000 +#define IC_TAR 0x0004 +#define IC_DATA_CMD 0x0010 +#define IC_INTR_STAT 0x002c +#define IC_INTR_MASK 0x0030 +#define IC_RAW_INTR_STAT 0x0034 +#define IC_CLR_INTR 0x0040 +#define IC_CLR_TX_ABRT 0x0054 +#define IC_CLR_STOP_DET 0x0060 +#define IC_ENABLE 0x006c +#define IC_TXFLR 0x0074 +#define IC_RXFLR 0x0078 +#define IC_TX_ABRT_SOURCE 0x0080 +#define IC_ENABLE_STATUS 0x009c +#define IC_COMP_PARAM_1 0x00f4 + +/* I2C Control register entry bit positions and sizes */ +#define IC_COMP_PARAM_1_MAX_SPEED_MODE_INDEX 2 +#define IC_COMP_PARAM_1_MAX_SPEED_MODE_WIDTH 2 +#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_INDEX 8 +#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_WIDTH 8 +#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_INDEX 16 +#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_WIDTH 8 +#define IC_CON_MASTER_MODE_INDEX 0 +#define IC_CON_MASTER_MODE_WIDTH 1 +#define IC_CON_RESTART_EN_INDEX 5 +#define IC_CON_RESTART_EN_WIDTH 1 +#define IC_CON_RX_FIFO_FULL_HOLD_INDEX 9 +#define IC_CON_RX_FIFO_FULL_HOLD_WIDTH 1 +#define IC_CON_SLAVE_DISABLE_INDEX 6 +#define IC_CON_SLAVE_DISABLE_WIDTH 1 +#define IC_CON_SPEED_INDEX 1 +#define IC_CON_SPEED_WIDTH 2 +#define IC_DATA_CMD_CMD_INDEX 8 +#define IC_DATA_CMD_CMD_WIDTH 1 +#define IC_DATA_CMD_STOP_INDEX 9 +#define IC_DATA_CMD_STOP_WIDTH 1 +#define IC_ENABLE_ABORT_INDEX 1 +#define IC_ENABLE_ABORT_WIDTH 1 +#define IC_ENABLE_EN_INDEX 0 +#define IC_ENABLE_EN_WIDTH 1 +#define IC_ENABLE_STATUS_EN_INDEX 0 +#define IC_ENABLE_STATUS_EN_WIDTH 1 +#define IC_INTR_MASK_TX_EMPTY_INDEX 4 +#define IC_INTR_MASK_TX_EMPTY_WIDTH 1 +#define IC_RAW_INTR_STAT_RX_FULL_INDEX 2 +#define IC_RAW_INTR_STAT_RX_FULL_WIDTH 1 +#define IC_RAW_INTR_STAT_STOP_DET_INDEX 9 +#define IC_RAW_INTR_STAT_STOP_DET_WIDTH 1 +#define IC_RAW_INTR_STAT_TX_ABRT_INDEX 6 +#define IC_RAW_INTR_STAT_TX_ABRT_WIDTH 1 +#define IC_RAW_INTR_STAT_TX_EMPTY_INDEX 4 +#define IC_RAW_INTR_STAT_TX_EMPTY_WIDTH 1 + +/* I2C Control register value */ +#define IC_TX_ABRT_7B_ADDR_NOACK 0x0001 +#define IC_TX_ABRT_ARB_LOST 0x1000 + /* Descriptor/Packet entry bit positions and sizes */ #define RX_PACKET_ERRORS_CRC_INDEX 2 #define RX_PACKET_ERRORS_CRC_WIDTH 1 @@ -1027,6 +1256,10 @@ #define MDIO_PMA_10GBR_FECCTRL 0x00ab #endif +#ifndef MDIO_PCS_DIG_CTRL +#define MDIO_PCS_DIG_CTRL 0x8000 +#endif + #ifndef MDIO_AN_XNP #define MDIO_AN_XNP 0x0016 #endif @@ -1047,11 +1280,48 @@ #define MDIO_AN_INT 0x8002 #endif +#ifndef MDIO_VEND2_AN_ADVERTISE +#define MDIO_VEND2_AN_ADVERTISE 0x0004 +#endif + +#ifndef MDIO_VEND2_AN_LP_ABILITY +#define MDIO_VEND2_AN_LP_ABILITY 0x0005 +#endif + +#ifndef MDIO_VEND2_AN_CTRL +#define MDIO_VEND2_AN_CTRL 0x8001 +#endif + +#ifndef MDIO_VEND2_AN_STAT +#define MDIO_VEND2_AN_STAT 0x8002 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif +#ifndef MDIO_VEND2_CTRL1_AN_ENABLE +#define MDIO_VEND2_CTRL1_AN_ENABLE BIT(12) +#endif + +#ifndef MDIO_VEND2_CTRL1_AN_RESTART +#define MDIO_VEND2_CTRL1_AN_RESTART BIT(9) +#endif + +#ifndef MDIO_VEND2_CTRL1_SS6 +#define MDIO_VEND2_CTRL1_SS6 BIT(6) +#endif + +#ifndef MDIO_VEND2_CTRL1_SS13 +#define MDIO_VEND2_CTRL1_SS13 BIT(13) +#endif + /* MDIO mask values */ +#define XGBE_AN_CL73_INT_CMPLT BIT(0) +#define XGBE_AN_CL73_INC_LINK BIT(1) +#define XGBE_AN_CL73_PG_RCV BIT(2) +#define XGBE_AN_CL73_INT_MASK 0x07 + #define XGBE_XNP_MCF_NULL_MESSAGE 0x001 #define XGBE_XNP_ACK_PROCESSED BIT(12) #define XGBE_XNP_MP_FORMATTED BIT(13) @@ -1060,6 +1330,19 @@ #define XGBE_KR_TRAINING_START BIT(0) #define XGBE_KR_TRAINING_ENABLE BIT(1) +#define XGBE_PCS_CL37_BP BIT(12) + +#define XGBE_AN_CL37_INT_CMPLT BIT(0) +#define XGBE_AN_CL37_INT_MASK 0x01 + +#define XGBE_AN_CL37_HD_MASK 0x40 +#define XGBE_AN_CL37_FD_MASK 0x20 + +#define XGBE_AN_CL37_PCS_MODE_MASK 0x06 +#define XGBE_AN_CL37_PCS_MODE_BASEX 0x00 +#define XGBE_AN_CL37_PCS_MODE_SGMII 0x04 +#define XGBE_AN_CL37_TX_CONFIG_MASK 0x08 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable @@ -1195,12 +1478,28 @@ do { \ /* Macros for building, reading or writing register values or bits * within the register values of XPCS registers. */ -#define XPCS_IOWRITE(_pdata, _off, _val) \ +#define XPCS_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XPCS_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XPCS32_IOWRITE(_pdata, _off, _val) \ iowrite32(_val, (_pdata)->xpcs_regs + (_off)) -#define XPCS_IOREAD(_pdata, _off) \ +#define XPCS32_IOREAD(_pdata, _off) \ ioread32((_pdata)->xpcs_regs + (_off)) +#define XPCS16_IOWRITE(_pdata, _off, _val) \ + iowrite16(_val, (_pdata)->xpcs_regs + (_off)) + +#define XPCS16_IOREAD(_pdata, _off) \ + ioread16((_pdata)->xpcs_regs + (_off)) + /* Macros for building, reading or writing register values or bits * within the register values of SerDes integration registers. */ @@ -1278,6 +1577,72 @@ do { \ } while (0) /* Macros for building, reading or writing register values or bits + * within the register values of MAC Control registers. + */ +#define XP_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XP_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XP_IOREAD(_pdata, _reg) \ + ioread32((_pdata)->xprop_regs + (_reg)) + +#define XP_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XP_IOREAD((_pdata), (_reg)), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XP_IOWRITE(_pdata, _reg, _val) \ + iowrite32((_val), (_pdata)->xprop_regs + (_reg)) + +#define XP_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u32 reg_val = XP_IOREAD((_pdata), (_reg)); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XP_IOWRITE((_pdata), (_reg), reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits + * within the register values of I2C Control registers. + */ +#define XI2C_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XI2C_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XI2C_IOREAD(_pdata, _reg) \ + ioread32((_pdata)->xi2c_regs + (_reg)) + +#define XI2C_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XI2C_IOREAD((_pdata), (_reg)), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XI2C_IOWRITE(_pdata, _reg, _val) \ + iowrite32((_val), (_pdata)->xi2c_regs + (_reg)) + +#define XI2C_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u32 reg_val = XI2C_IOREAD((_pdata), (_reg)); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XI2C_IOWRITE((_pdata), (_reg), reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits * using MDIO. Different from above because of the use of standardized * Linux include values. No shifting is performed with the bit * operations, everything works on mask values. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 96f485ab612e..0c0140decbc2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -316,6 +316,126 @@ static const struct file_operations xpcs_reg_value_fops = { .write = xpcs_reg_value_write, }; +static ssize_t xprop_reg_addr_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xprop_reg); +} + +static ssize_t xprop_reg_addr_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_write(buffer, count, ppos, + &pdata->debugfs_xprop_reg); +} + +static ssize_t xprop_reg_value_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + + value = XP_IOREAD(pdata, pdata->debugfs_xprop_reg); + + return xgbe_common_read(buffer, count, ppos, value); +} + +static ssize_t xprop_reg_value_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + ssize_t len; + + len = xgbe_common_write(buffer, count, ppos, &value); + if (len < 0) + return len; + + XP_IOWRITE(pdata, pdata->debugfs_xprop_reg, value); + + return len; +} + +static const struct file_operations xprop_reg_addr_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xprop_reg_addr_read, + .write = xprop_reg_addr_write, +}; + +static const struct file_operations xprop_reg_value_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xprop_reg_value_read, + .write = xprop_reg_value_write, +}; + +static ssize_t xi2c_reg_addr_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xi2c_reg); +} + +static ssize_t xi2c_reg_addr_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_write(buffer, count, ppos, + &pdata->debugfs_xi2c_reg); +} + +static ssize_t xi2c_reg_value_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + + value = XI2C_IOREAD(pdata, pdata->debugfs_xi2c_reg); + + return xgbe_common_read(buffer, count, ppos, value); +} + +static ssize_t xi2c_reg_value_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + ssize_t len; + + len = xgbe_common_write(buffer, count, ppos, &value); + if (len < 0) + return len; + + XI2C_IOWRITE(pdata, pdata->debugfs_xi2c_reg, value); + + return len; +} + +static const struct file_operations xi2c_reg_addr_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xi2c_reg_addr_read, + .write = xi2c_reg_addr_write, +}; + +static const struct file_operations xi2c_reg_value_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xi2c_reg_value_read, + .write = xi2c_reg_value_write, +}; + void xgbe_debugfs_init(struct xgbe_prv_data *pdata) { struct dentry *pfile; @@ -367,6 +487,38 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) if (!pfile) netdev_err(pdata->netdev, "debugfs_create_file failed\n"); + if (pdata->xprop_regs) { + pfile = debugfs_create_file("xprop_register", 0600, + pdata->xgbe_debugfs, pdata, + &xprop_reg_addr_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + + pfile = debugfs_create_file("xprop_register_value", 0600, + pdata->xgbe_debugfs, pdata, + &xprop_reg_value_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + } + + if (pdata->xi2c_regs) { + pfile = debugfs_create_file("xi2c_register", 0600, + pdata->xgbe_debugfs, pdata, + &xi2c_reg_addr_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + + pfile = debugfs_create_file("xi2c_register_value", 0600, + pdata->xgbe_debugfs, pdata, + &xi2c_reg_value_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + } + kfree(buf); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 1babcc11a248..30056e24e1fc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -123,6 +123,11 @@ #include "xgbe.h" #include "xgbe-common.h" +static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata) +{ + return pdata->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; +} + static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata, unsigned int usec) { @@ -491,6 +496,27 @@ static void xgbe_config_rss(struct xgbe_prv_data *pdata) "error configuring RSS, RSS disabled\n"); } +static bool xgbe_is_pfc_queue(struct xgbe_prv_data *pdata, + unsigned int queue) +{ + unsigned int prio, tc; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + /* Does this queue handle the priority? */ + if (pdata->prio2q_map[prio] != queue) + continue; + + /* Get the Traffic Class for this priority */ + tc = pdata->ets->prio_tc[prio]; + + /* Check if PFC is enabled for this traffic class */ + if (pdata->pfc->pfc_en & (1 << tc)) + return true; + } + + return false; +} + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { unsigned int max_q_count, q_count; @@ -528,27 +554,14 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) for (i = 0; i < pdata->rx_q_count; i++) { unsigned int ehfc = 0; - if (pfc && ets) { - unsigned int prio; - - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { - unsigned int tc; - - /* Does this queue handle the priority? */ - if (pdata->prio2q_map[prio] != i) - continue; - - /* Get the Traffic Class for this priority */ - tc = ets->prio_tc[prio]; - - /* Check if flow control should be enabled */ - if (pfc->pfc_en & (1 << tc)) { + if (pdata->rx_rfd[i]) { + /* Flow control thresholds are established */ + if (pfc && ets) { + if (xgbe_is_pfc_queue(pdata, i)) ehfc = 1; - break; - } + } else { + ehfc = 1; } - } else { - ehfc = 1; } XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc); @@ -633,6 +646,11 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) unsigned int dma_ch_isr, dma_ch_ier; unsigned int i; + /* Set the interrupt mode if supported */ + if (pdata->channel_irq_mode) + XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM, + pdata->channel_irq_mode); + channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { /* Clear all the interrupts which are set */ @@ -654,19 +672,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) if (channel->tx_ring) { /* Enable the following Tx interrupts * TIE - Transmit Interrupt Enable (unless using - * per channel interrupts) + * per channel interrupts in edge triggered + * mode) */ - if (!pdata->per_channel_irq) + if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1); } if (channel->rx_ring) { /* Enable following Rx interrupts * RBUE - Receive Buffer Unavailable Enable * RIE - Receive Interrupt Enable (unless using - * per channel interrupts) + * per channel interrupts in edge triggered + * mode) */ XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1); - if (!pdata->per_channel_irq) + if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1); } @@ -702,34 +722,90 @@ static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata) /* Enable all counter interrupts */ XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xffffffff); XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff); + + /* Enable MDIO single command completion interrupt */ + XGMAC_IOWRITE_BITS(pdata, MAC_MDIOIER, SNGLCOMPIE, 1); } -static int xgbe_set_gmii_speed(struct xgbe_prv_data *pdata) +static void xgbe_enable_ecc_interrupts(struct xgbe_prv_data *pdata) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x3) - return 0; + unsigned int ecc_isr, ecc_ier = 0; - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x3); + if (!pdata->vdata->ecc_support) + return; - return 0; + /* Clear all the interrupts which are set */ + ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR); + XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr); + + /* Enable ECC interrupts */ + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 1); + + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); } -static int xgbe_set_gmii_2500_speed(struct xgbe_prv_data *pdata) +static void xgbe_disable_ecc_ded(struct xgbe_prv_data *pdata) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x2) - return 0; + unsigned int ecc_ier; - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x2); + ecc_ier = XP_IOREAD(pdata, XP_ECC_IER); - return 0; + /* Disable ECC DED interrupts */ + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 0); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 0); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 0); + + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); } -static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata) +static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata, + enum xgbe_ecc_sec sec) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0) - return 0; + unsigned int ecc_ier; + + ecc_ier = XP_IOREAD(pdata, XP_ECC_IER); + + /* Disable ECC SEC interrupt */ + switch (sec) { + case XGBE_ECC_SEC_TX: + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 0); + break; + case XGBE_ECC_SEC_RX: + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 0); + break; + case XGBE_ECC_SEC_DESC: + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 0); + break; + } - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0); + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); +} + +static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) +{ + unsigned int ss; + + switch (speed) { + case SPEED_1000: + ss = 0x03; + break; + case SPEED_2500: + ss = 0x02; + break; + case SPEED_10000: + ss = 0x00; + break; + default: + return -EINVAL; + } + + if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss) + XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss); return 0; } @@ -1019,8 +1095,101 @@ static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata) return 0; } -static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, - int mmd_reg) +static int xgbe_clr_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) +{ + unsigned int reg; + + if (gpio > 16) + return -EINVAL; + + reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); + + reg &= ~(1 << (gpio + 16)); + XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); + + return 0; +} + +static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) +{ + unsigned int reg; + + if (gpio > 16) + return -EINVAL; + + reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); + + reg |= (1 << (gpio + 16)); + XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); + + return 0; +} + +static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + unsigned long flags; + unsigned int mmd_address, index, offset; + int mmd_data; + + if (mmd_reg & MII_ADDR_C45) + mmd_address = mmd_reg & ~MII_ADDR_C45; + else + mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + + /* The PCS registers are accessed using mmio. The underlying + * management interface uses indirect addressing to access the MMD + * register sets. This requires accessing of the PCS register in two + * phases, an address phase and a data phase. + * + * The mmio interface is based on 16-bit offsets and values. All + * register offsets must therefore be adjusted by left shifting the + * offset 1 bit and reading 16 bits of data. + */ + mmd_address <<= 1; + index = mmd_address & ~pdata->xpcs_window_mask; + offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + + spin_lock_irqsave(&pdata->xpcs_lock, flags); + XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + mmd_data = XPCS16_IOREAD(pdata, offset); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); + + return mmd_data; +} + +static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) +{ + unsigned long flags; + unsigned int mmd_address, index, offset; + + if (mmd_reg & MII_ADDR_C45) + mmd_address = mmd_reg & ~MII_ADDR_C45; + else + mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + + /* The PCS registers are accessed using mmio. The underlying + * management interface uses indirect addressing to access the MMD + * register sets. This requires accessing of the PCS register in two + * phases, an address phase and a data phase. + * + * The mmio interface is based on 16-bit offsets and values. All + * register offsets must therefore be adjusted by left shifting the + * offset 1 bit and writing 16 bits of data. + */ + mmd_address <<= 1; + index = mmd_address & ~pdata->xpcs_window_mask; + offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + + spin_lock_irqsave(&pdata->xpcs_lock, flags); + XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + XPCS16_IOWRITE(pdata, offset, mmd_data); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); +} + +static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) { unsigned long flags; unsigned int mmd_address; @@ -1041,15 +1210,15 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * offset 2 bits and reading 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); - mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2); + XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); + mmd_data = XPCS32_IOREAD(pdata, (mmd_address & 0xff) << 2); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); return mmd_data; } -static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, - int mmd_reg, int mmd_data) +static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) { unsigned int mmd_address; unsigned long flags; @@ -1066,14 +1235,113 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * * The mmio interface is based on 32-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the - * offset 2 bits and reading 32 bits of data. + * offset 2 bits and writing 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); - XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); + XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); + XPCS32_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } +static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + switch (pdata->vdata->xpcs_access) { + case XGBE_XPCS_ACCESS_V1: + return xgbe_read_mmd_regs_v1(pdata, prtad, mmd_reg); + + case XGBE_XPCS_ACCESS_V2: + default: + return xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg); + } +} + +static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) +{ + switch (pdata->vdata->xpcs_access) { + case XGBE_XPCS_ACCESS_V1: + return xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data); + + case XGBE_XPCS_ACCESS_V2: + default: + return xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data); + } +} + +static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) +{ + unsigned int mdio_sca, mdio_sccd; + + reinit_completion(&pdata->mdio_complete); + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); + XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); + + if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) { + netdev_err(pdata->netdev, "mdio write operation timed out\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg) +{ + unsigned int mdio_sca, mdio_sccd; + + reinit_completion(&pdata->mdio_complete); + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); + XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); + + if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) { + netdev_err(pdata->netdev, "mdio read operation timed out\n"); + return -ETIMEDOUT; + } + + return XGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA); +} + +static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port, + enum xgbe_mdio_mode mode) +{ + unsigned int reg_val = 0; + + switch (mode) { + case XGBE_MDIO_MODE_CL22: + if (port > XGMAC_MAX_C22_PORT) + return -EINVAL; + reg_val |= (1 << port); + break; + case XGBE_MDIO_MODE_CL45: + break; + default: + return -EINVAL; + } + + XGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val); + + return 0; +} + static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc) { return !XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN); @@ -1264,14 +1532,21 @@ static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata) static u64 xgbe_get_tx_tstamp(struct xgbe_prv_data *pdata) { - unsigned int tx_snr; + unsigned int tx_snr, tx_ssr; u64 nsec; - tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + if (pdata->vdata->tx_tstamp_workaround) { + tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR); + } else { + tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR); + tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + } + if (XGMAC_GET_BITS(tx_snr, MAC_TXSNR, TXTSSTSMIS)) return 0; - nsec = XGMAC_IOREAD(pdata, MAC_TXSSR); + nsec = tx_ssr; nsec *= NSEC_PER_SEC; nsec += tx_snr; @@ -1327,106 +1602,6 @@ static int xgbe_config_tstamp(struct xgbe_prv_data *pdata, return 0; } -static void xgbe_config_tc(struct xgbe_prv_data *pdata) -{ - unsigned int offset, queue, prio; - u8 i; - - netdev_reset_tc(pdata->netdev); - if (!pdata->num_tcs) - return; - - netdev_set_num_tc(pdata->netdev, pdata->num_tcs); - - for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) { - while ((queue < pdata->tx_q_count) && - (pdata->q2tc_map[queue] == i)) - queue++; - - netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n", - i, offset, queue - 1); - netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset); - offset = queue; - } - - if (!pdata->ets) - return; - - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) - netdev_set_prio_tc_map(pdata->netdev, prio, - pdata->ets->prio_tc[prio]); -} - -static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) -{ - struct ieee_ets *ets = pdata->ets; - unsigned int total_weight, min_weight, weight; - unsigned int mask, reg, reg_val; - unsigned int i, prio; - - if (!ets) - return; - - /* Set Tx to deficit weighted round robin scheduling algorithm (when - * traffic class is using ETS algorithm) - */ - XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR); - - /* Set Traffic Class algorithms */ - total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt; - min_weight = total_weight / 100; - if (!min_weight) - min_weight = 1; - - for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { - /* Map the priorities to the traffic class */ - mask = 0; - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { - if (ets->prio_tc[prio] == i) - mask |= (1 << prio); - } - mask &= 0xff; - - netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n", - i, mask); - reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG)); - reg_val = XGMAC_IOREAD(pdata, reg); - - reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3)); - reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3)); - - XGMAC_IOWRITE(pdata, reg, reg_val); - - /* Set the traffic class algorithm */ - switch (ets->tc_tsa[i]) { - case IEEE_8021QAZ_TSA_STRICT: - netif_dbg(pdata, drv, pdata->netdev, - "TC%u using SP\n", i); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, - MTL_TSA_SP); - break; - case IEEE_8021QAZ_TSA_ETS: - weight = total_weight * ets->tc_tx_bw[i] / 100; - weight = clamp(weight, min_weight, total_weight); - - netif_dbg(pdata, drv, pdata->netdev, - "TC%u using DWRR (weight %u)\n", i, weight); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, - MTL_TSA_ETS); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, - weight); - break; - } - } - - xgbe_config_tc(pdata); -} - -static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) -{ - xgbe_config_flow_control(pdata); -} - static void xgbe_tx_start_xmit(struct xgbe_channel *channel, struct xgbe_ring *ring) { @@ -1901,7 +2076,7 @@ static int xgbe_disable_int(struct xgbe_channel *channel, return 0; } -static int xgbe_exit(struct xgbe_prv_data *pdata) +static int __xgbe_exit(struct xgbe_prv_data *pdata) { unsigned int count = 2000; @@ -1923,6 +2098,20 @@ static int xgbe_exit(struct xgbe_prv_data *pdata) return 0; } +static int xgbe_exit(struct xgbe_prv_data *pdata) +{ + int ret; + + /* To guard against possible incorrectly generated interrupts, + * issue the software reset twice. + */ + ret = __xgbe_exit(pdata); + if (ret) + return ret; + + return __xgbe_exit(pdata); +} + static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata) { unsigned int i, count; @@ -2000,61 +2189,331 @@ static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP); } -static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size, - unsigned int queue_count) +static void xgbe_queue_flow_control_threshold(struct xgbe_prv_data *pdata, + unsigned int queue, + unsigned int q_fifo_size) +{ + unsigned int frame_fifo_size; + unsigned int rfa, rfd; + + frame_fifo_size = XGMAC_FLOW_CONTROL_ALIGN(xgbe_get_max_frame(pdata)); + + if (pdata->pfcq[queue] && (q_fifo_size > pdata->pfc_rfa)) { + /* PFC is active for this queue */ + rfa = pdata->pfc_rfa; + rfd = rfa + frame_fifo_size; + if (rfd > XGMAC_FLOW_CONTROL_MAX) + rfd = XGMAC_FLOW_CONTROL_MAX; + if (rfa >= XGMAC_FLOW_CONTROL_MAX) + rfa = XGMAC_FLOW_CONTROL_MAX - XGMAC_FLOW_CONTROL_UNIT; + } else { + /* This path deals with just maximum frame sizes which are + * limited to a jumbo frame of 9,000 (plus headers, etc.) + * so we can never exceed the maximum allowable RFA/RFD + * values. + */ + if (q_fifo_size <= 2048) { + /* rx_rfd to zero to signal no flow control */ + pdata->rx_rfa[queue] = 0; + pdata->rx_rfd[queue] = 0; + return; + } + + if (q_fifo_size <= 4096) { + /* Between 2048 and 4096 */ + pdata->rx_rfa[queue] = 0; /* Full - 1024 bytes */ + pdata->rx_rfd[queue] = 1; /* Full - 1536 bytes */ + return; + } + + if (q_fifo_size <= frame_fifo_size) { + /* Between 4096 and max-frame */ + pdata->rx_rfa[queue] = 2; /* Full - 2048 bytes */ + pdata->rx_rfd[queue] = 5; /* Full - 3584 bytes */ + return; + } + + if (q_fifo_size <= (frame_fifo_size * 3)) { + /* Between max-frame and 3 max-frames, + * trigger if we get just over a frame of data and + * resume when we have just under half a frame left. + */ + rfa = q_fifo_size - frame_fifo_size; + rfd = rfa + (frame_fifo_size / 2); + } else { + /* Above 3 max-frames - trigger when just over + * 2 frames of space available + */ + rfa = frame_fifo_size * 2; + rfa += XGMAC_FLOW_CONTROL_UNIT; + rfd = rfa + frame_fifo_size; + } + } + + pdata->rx_rfa[queue] = XGMAC_FLOW_CONTROL_VALUE(rfa); + pdata->rx_rfd[queue] = XGMAC_FLOW_CONTROL_VALUE(rfd); +} + +static void xgbe_calculate_flow_control_threshold(struct xgbe_prv_data *pdata, + unsigned int *fifo) { unsigned int q_fifo_size; - unsigned int p_fifo; + unsigned int i; - /* Calculate the configured fifo size */ - q_fifo_size = 1 << (fifo_size + 7); + for (i = 0; i < pdata->rx_q_count; i++) { + q_fifo_size = (fifo[i] + 1) * XGMAC_FIFO_UNIT; + xgbe_queue_flow_control_threshold(pdata, i, q_fifo_size); + } +} + +static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->rx_q_count; i++) { + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, + pdata->rx_rfa[i]); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, + pdata->rx_rfd[i]); + } +} + +static unsigned int xgbe_get_tx_fifo_size(struct xgbe_prv_data *pdata) +{ /* The configured value may not be the actual amount of fifo RAM */ - q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size); + return min_t(unsigned int, pdata->tx_max_fifo_size, + pdata->hw_feat.tx_fifo_size); +} - q_fifo_size = q_fifo_size / queue_count; +static unsigned int xgbe_get_rx_fifo_size(struct xgbe_prv_data *pdata) +{ + /* The configured value may not be the actual amount of fifo RAM */ + return min_t(unsigned int, pdata->rx_max_fifo_size, + pdata->hw_feat.rx_fifo_size); +} - /* Each increment in the queue fifo size represents 256 bytes of - * fifo, with 0 representing 256 bytes. Distribute the fifo equally - * between the queues. +static void xgbe_calculate_equal_fifo(unsigned int fifo_size, + unsigned int queue_count, + unsigned int *fifo) +{ + unsigned int q_fifo_size; + unsigned int p_fifo; + unsigned int i; + + q_fifo_size = fifo_size / queue_count; + + /* Calculate the fifo setting by dividing the queue's fifo size + * by the fifo allocation increment (with 0 representing the + * base allocation increment so decrement the result by 1). */ - p_fifo = q_fifo_size / 256; + p_fifo = q_fifo_size / XGMAC_FIFO_UNIT; if (p_fifo) p_fifo--; - return p_fifo; + /* Distribute the fifo equally amongst the queues */ + for (i = 0; i < queue_count; i++) + fifo[i] = p_fifo; +} + +static unsigned int xgbe_set_nonprio_fifos(unsigned int fifo_size, + unsigned int queue_count, + unsigned int *fifo) +{ + unsigned int i; + + BUILD_BUG_ON_NOT_POWER_OF_2(XGMAC_FIFO_MIN_ALLOC); + + if (queue_count <= IEEE_8021QAZ_MAX_TCS) + return fifo_size; + + /* Rx queues 9 and up are for specialized packets, + * such as PTP or DCB control packets, etc. and + * don't require a large fifo + */ + for (i = IEEE_8021QAZ_MAX_TCS; i < queue_count; i++) { + fifo[i] = (XGMAC_FIFO_MIN_ALLOC / XGMAC_FIFO_UNIT) - 1; + fifo_size -= XGMAC_FIFO_MIN_ALLOC; + } + + return fifo_size; +} + +static unsigned int xgbe_get_pfc_delay(struct xgbe_prv_data *pdata) +{ + unsigned int delay; + + /* If a delay has been provided, use that */ + if (pdata->pfc->delay) + return pdata->pfc->delay / 8; + + /* Allow for two maximum size frames */ + delay = xgbe_get_max_frame(pdata); + delay += XGMAC_ETH_PREAMBLE; + delay *= 2; + + /* Allow for PFC frame */ + delay += XGMAC_PFC_DATA_LEN; + delay += ETH_HLEN + ETH_FCS_LEN; + delay += XGMAC_ETH_PREAMBLE; + + /* Allow for miscellaneous delays (LPI exit, cable, etc.) */ + delay += XGMAC_PFC_DELAYS; + + return delay; +} + +static unsigned int xgbe_get_pfc_queues(struct xgbe_prv_data *pdata) +{ + unsigned int count, prio_queues; + unsigned int i; + + if (!pdata->pfc->pfc_en) + return 0; + + count = 0; + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + for (i = 0; i < prio_queues; i++) { + if (!xgbe_is_pfc_queue(pdata, i)) + continue; + + pdata->pfcq[i] = 1; + count++; + } + + return count; +} + +static void xgbe_calculate_dcb_fifo(struct xgbe_prv_data *pdata, + unsigned int fifo_size, + unsigned int *fifo) +{ + unsigned int q_fifo_size, rem_fifo, addn_fifo; + unsigned int prio_queues; + unsigned int pfc_count; + unsigned int i; + + q_fifo_size = XGMAC_FIFO_ALIGN(xgbe_get_max_frame(pdata)); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + pfc_count = xgbe_get_pfc_queues(pdata); + + if (!pfc_count || ((q_fifo_size * prio_queues) > fifo_size)) { + /* No traffic classes with PFC enabled or can't do lossless */ + xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo); + return; + } + + /* Calculate how much fifo we have to play with */ + rem_fifo = fifo_size - (q_fifo_size * prio_queues); + + /* Calculate how much more than base fifo PFC needs, which also + * becomes the threshold activation point (RFA) + */ + pdata->pfc_rfa = xgbe_get_pfc_delay(pdata); + pdata->pfc_rfa = XGMAC_FLOW_CONTROL_ALIGN(pdata->pfc_rfa); + + if (pdata->pfc_rfa > q_fifo_size) { + addn_fifo = pdata->pfc_rfa - q_fifo_size; + addn_fifo = XGMAC_FIFO_ALIGN(addn_fifo); + } else { + addn_fifo = 0; + } + + /* Calculate DCB fifo settings: + * - distribute remaining fifo between the VLAN priority + * queues based on traffic class PFC enablement and overall + * priority (0 is lowest priority, so start at highest) + */ + i = prio_queues; + while (i > 0) { + i--; + + fifo[i] = (q_fifo_size / XGMAC_FIFO_UNIT) - 1; + + if (!pdata->pfcq[i] || !addn_fifo) + continue; + + if (addn_fifo > rem_fifo) { + netdev_warn(pdata->netdev, + "RXq%u cannot set needed fifo size\n", i); + if (!rem_fifo) + continue; + + addn_fifo = rem_fifo; + } + + fifo[i] += (addn_fifo / XGMAC_FIFO_UNIT); + rem_fifo -= addn_fifo; + } + + if (rem_fifo) { + unsigned int inc_fifo = rem_fifo / prio_queues; + + /* Distribute remaining fifo across queues */ + for (i = 0; i < prio_queues; i++) + fifo[i] += (inc_fifo / XGMAC_FIFO_UNIT); + } } static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; + unsigned int fifo[XGBE_MAX_QUEUES]; unsigned int i; - fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size, - pdata->tx_q_count); + fifo_size = xgbe_get_tx_fifo_size(pdata); + + xgbe_calculate_equal_fifo(fifo_size, pdata->tx_q_count, fifo); for (i = 0; i < pdata->tx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo[i]); netif_info(pdata, drv, pdata->netdev, "%d Tx hardware queues, %d byte fifo per queue\n", - pdata->tx_q_count, ((fifo_size + 1) * 256)); + pdata->tx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); } static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; + unsigned int fifo[XGBE_MAX_QUEUES]; + unsigned int prio_queues; unsigned int i; - fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size, - pdata->rx_q_count); + /* Clear any DCB related fifo/queue information */ + memset(pdata->pfcq, 0, sizeof(pdata->pfcq)); + pdata->pfc_rfa = 0; + + fifo_size = xgbe_get_rx_fifo_size(pdata); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + + /* Assign a minimum fifo to the non-VLAN priority queues */ + fifo_size = xgbe_set_nonprio_fifos(fifo_size, pdata->rx_q_count, fifo); + + if (pdata->pfc && pdata->ets) + xgbe_calculate_dcb_fifo(pdata, fifo_size, fifo); + else + xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo); for (i = 0; i < pdata->rx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo[i]); - netif_info(pdata, drv, pdata->netdev, - "%d Rx hardware queues, %d byte fifo per queue\n", - pdata->rx_q_count, ((fifo_size + 1) * 256)); + xgbe_calculate_flow_control_threshold(pdata, fifo); + xgbe_config_flow_control_threshold(pdata); + + if (pdata->pfc && pdata->ets && pdata->pfc->pfc_en) { + netif_info(pdata, drv, pdata->netdev, + "%u Rx hardware queues\n", pdata->rx_q_count); + for (i = 0; i < pdata->rx_q_count; i++) + netif_info(pdata, drv, pdata->netdev, + "RxQ%u, %u byte fifo queue\n", i, + ((fifo[i] + 1) * XGMAC_FIFO_UNIT)); + } else { + netif_info(pdata, drv, pdata->netdev, + "%u Rx hardware queues, %u byte fifo per queue\n", + pdata->rx_q_count, + ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); + } } static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) @@ -2090,8 +2549,7 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) } /* Map the 8 VLAN priority values to available MTL Rx queues */ - prio_queues = min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, - pdata->rx_q_count); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); ppq = IEEE_8021QAZ_MAX_TCS / prio_queues; ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues; @@ -2139,16 +2597,120 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) } } -static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) +static void xgbe_config_tc(struct xgbe_prv_data *pdata) { - unsigned int i; + unsigned int offset, queue, prio; + u8 i; - for (i = 0; i < pdata->rx_q_count; i++) { - /* Activate flow control when less than 4k left in fifo */ - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, 2); + netdev_reset_tc(pdata->netdev); + if (!pdata->num_tcs) + return; + + netdev_set_num_tc(pdata->netdev, pdata->num_tcs); + + for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) { + while ((queue < pdata->tx_q_count) && + (pdata->q2tc_map[queue] == i)) + queue++; - /* De-activate flow control when more than 6k left in fifo */ - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, 4); + netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n", + i, offset, queue - 1); + netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset); + offset = queue; + } + + if (!pdata->ets) + return; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + netdev_set_prio_tc_map(pdata->netdev, prio, + pdata->ets->prio_tc[prio]); +} + +static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) +{ + struct ieee_ets *ets = pdata->ets; + unsigned int total_weight, min_weight, weight; + unsigned int mask, reg, reg_val; + unsigned int i, prio; + + if (!ets) + return; + + /* Set Tx to deficit weighted round robin scheduling algorithm (when + * traffic class is using ETS algorithm) + */ + XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR); + + /* Set Traffic Class algorithms */ + total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt; + min_weight = total_weight / 100; + if (!min_weight) + min_weight = 1; + + for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { + /* Map the priorities to the traffic class */ + mask = 0; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + if (ets->prio_tc[prio] == i) + mask |= (1 << prio); + } + mask &= 0xff; + + netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n", + i, mask); + reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG)); + reg_val = XGMAC_IOREAD(pdata, reg); + + reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3)); + reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3)); + + XGMAC_IOWRITE(pdata, reg, reg_val); + + /* Set the traffic class algorithm */ + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using SP\n", i); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, + MTL_TSA_SP); + break; + case IEEE_8021QAZ_TSA_ETS: + weight = total_weight * ets->tc_tx_bw[i] / 100; + weight = clamp(weight, min_weight, total_weight); + + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using DWRR (weight %u)\n", i, weight); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, + MTL_TSA_ETS); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, + weight); + break; + } + } + + xgbe_config_tc(pdata); +} + +static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) +{ + if (!test_bit(XGBE_DOWN, &pdata->dev_state)) { + /* Just stop the Tx queues while Rx fifo is changed */ + netif_tx_stop_all_queues(pdata->netdev); + + /* Suspend Rx so that fifo's can be adjusted */ + pdata->hw_if.disable_rx(pdata); + } + + xgbe_config_rx_fifo_size(pdata); + xgbe_config_flow_control(pdata); + + if (!test_bit(XGBE_DOWN, &pdata->dev_state)) { + /* Resume Rx */ + pdata->hw_if.enable_rx(pdata); + + /* Resume Tx queues */ + netif_tx_start_all_queues(pdata->netdev); } } @@ -2175,19 +2737,7 @@ static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata) static void xgbe_config_mac_speed(struct xgbe_prv_data *pdata) { - switch (pdata->phy_speed) { - case SPEED_10000: - xgbe_set_xgmii_speed(pdata); - break; - - case SPEED_2500: - xgbe_set_gmii_2500_speed(pdata); - break; - - case SPEED_1000: - xgbe_set_gmii_speed(pdata); - break; - } + xgbe_set_speed(pdata, pdata->phy_speed); } static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata) @@ -2223,17 +2773,33 @@ static u64 xgbe_mmc_read(struct xgbe_prv_data *pdata, unsigned int reg_lo) bool read_hi; u64 val; - switch (reg_lo) { - /* These registers are always 64 bit */ - case MMC_TXOCTETCOUNT_GB_LO: - case MMC_TXOCTETCOUNT_G_LO: - case MMC_RXOCTETCOUNT_GB_LO: - case MMC_RXOCTETCOUNT_G_LO: - read_hi = true; - break; + if (pdata->vdata->mmc_64bit) { + switch (reg_lo) { + /* These registers are always 32 bit */ + case MMC_RXRUNTERROR: + case MMC_RXJABBERERROR: + case MMC_RXUNDERSIZE_G: + case MMC_RXOVERSIZE_G: + case MMC_RXWATCHDOGERROR: + read_hi = false; + break; - default: - read_hi = false; + default: + read_hi = true; + } + } else { + switch (reg_lo) { + /* These registers are always 64 bit */ + case MMC_TXOCTETCOUNT_GB_LO: + case MMC_TXOCTETCOUNT_G_LO: + case MMC_RXOCTETCOUNT_GB_LO: + case MMC_RXOCTETCOUNT_G_LO: + read_hi = true; + break; + + default: + read_hi = false; + } } val = XGMAC_IOREAD(pdata, reg_lo); @@ -2563,20 +3129,48 @@ static void xgbe_config_mmc(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1); } +static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata, + unsigned int queue) +{ + unsigned int tx_status; + unsigned long tx_timeout; + + /* The Tx engine cannot be stopped if it is actively processing + * packets. Wait for the Tx queue to empty the Tx fifo. Don't + * wait forever though... + */ + tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ); + while (time_before(jiffies, tx_timeout)) { + tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR); + if ((XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TRCSTS) != 1) && + (XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TXQSTS) == 0)) + break; + + usleep_range(500, 1000); + } + + if (!time_before(jiffies, tx_timeout)) + netdev_info(pdata->netdev, + "timed out waiting for Tx queue %u to empty\n", + queue); +} + static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, - struct xgbe_channel *channel) + unsigned int queue) { unsigned int tx_dsr, tx_pos, tx_qidx; unsigned int tx_status; unsigned long tx_timeout; + if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) > 0x20) + return xgbe_txq_prepare_tx_stop(pdata, queue); + /* Calculate the status register to read and the position within */ - if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) { + if (queue < DMA_DSRX_FIRST_QUEUE) { tx_dsr = DMA_DSR0; - tx_pos = (channel->queue_index * DMA_DSR_Q_WIDTH) + - DMA_DSR0_TPS_START; + tx_pos = (queue * DMA_DSR_Q_WIDTH) + DMA_DSR0_TPS_START; } else { - tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE; + tx_qidx = queue - DMA_DSRX_FIRST_QUEUE; tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC); tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) + @@ -2601,7 +3195,7 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, if (!time_before(jiffies, tx_timeout)) netdev_info(pdata->netdev, "timed out waiting for Tx DMA channel %u to stop\n", - channel->queue_index); + queue); } static void xgbe_enable_tx(struct xgbe_prv_data *pdata) @@ -2633,13 +3227,8 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata) unsigned int i; /* Prepare for Tx DMA channel stop */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) - break; - - xgbe_prepare_tx_stop(pdata, channel); - } + for (i = 0; i < pdata->tx_q_count; i++) + xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); @@ -2763,13 +3352,8 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata) unsigned int i; /* Prepare for Tx DMA channel stop */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) - break; - - xgbe_prepare_tx_stop(pdata, channel); - } + for (i = 0; i < pdata->tx_q_count; i++) + xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); @@ -2856,12 +3440,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_rx_threshold(pdata, pdata->rx_threshold); xgbe_config_tx_fifo_size(pdata); xgbe_config_rx_fifo_size(pdata); - xgbe_config_flow_control_threshold(pdata); /*TODO: Error Packet and undersized good Packet forwarding enable (FEP and FUP) */ xgbe_config_dcb_tc(pdata); - xgbe_config_dcb_pfc(pdata); xgbe_enable_mtl_interrupts(pdata); /* @@ -2877,6 +3459,11 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_mmc(pdata); xgbe_enable_mac_interrupts(pdata); + /* + * Initialize ECC related features + */ + xgbe_enable_ecc_interrupts(pdata); + DBGPR("<--xgbe_init\n"); return 0; @@ -2903,9 +3490,14 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->read_mmd_regs = xgbe_read_mmd_regs; hw_if->write_mmd_regs = xgbe_write_mmd_regs; - hw_if->set_gmii_speed = xgbe_set_gmii_speed; - hw_if->set_gmii_2500_speed = xgbe_set_gmii_2500_speed; - hw_if->set_xgmii_speed = xgbe_set_xgmii_speed; + hw_if->set_speed = xgbe_set_speed; + + hw_if->set_ext_mii_mode = xgbe_set_ext_mii_mode; + hw_if->read_ext_mii_regs = xgbe_read_ext_mii_regs; + hw_if->write_ext_mii_regs = xgbe_write_ext_mii_regs; + + hw_if->set_gpio = xgbe_set_gpio; + hw_if->clr_gpio = xgbe_clr_gpio; hw_if->enable_tx = xgbe_enable_tx; hw_if->disable_tx = xgbe_disable_tx; @@ -2984,5 +3576,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->set_rss_hash_key = xgbe_set_rss_hash_key; hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table; + /* For ECC */ + hw_if->disable_ecc_ded = xgbe_disable_ecc_ded; + hw_if->disable_ecc_sec = xgbe_disable_ecc_sec; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 7f9216db026f..155190db682d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -114,7 +114,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/platform_device.h> +#include <linux/module.h> #include <linux/spinlock.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -127,8 +127,35 @@ #include "xgbe.h" #include "xgbe-common.h" +static unsigned int ecc_sec_info_threshold = 10; +static unsigned int ecc_sec_warn_threshold = 10000; +static unsigned int ecc_sec_period = 600; +static unsigned int ecc_ded_threshold = 2; +static unsigned int ecc_ded_period = 600; + +#ifdef CONFIG_AMD_XGBE_HAVE_ECC +/* Only expose the ECC parameters if supported */ +module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_info_threshold, + " ECC corrected error informational threshold setting"); + +module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_warn_threshold, + " ECC corrected error warning threshold setting"); + +module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)"); + +module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting"); + +module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)"); +#endif + static int xgbe_one_poll(struct napi_struct *, int); static int xgbe_all_poll(struct napi_struct *, int); +static void xgbe_stop(struct xgbe_prv_data *); static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) { @@ -160,18 +187,8 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE + (DMA_CH_INC * i); - if (pdata->per_channel_irq) { - /* Get the DMA interrupt (offset 1) */ - ret = platform_get_irq(pdata->pdev, i + 1); - if (ret < 0) { - netdev_err(pdata->netdev, - "platform_get_irq %u failed\n", - i + 1); - goto err_irq; - } - - channel->dma_irq = ret; - } + if (pdata->per_channel_irq) + channel->dma_irq = pdata->channel_irq[i]; if (i < pdata->tx_ring_count) { spin_lock_init(&tx_ring->lock); @@ -194,9 +211,6 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) return 0; -err_irq: - kfree(rx_ring); - err_rx_ring: kfree(tx_ring); @@ -257,11 +271,6 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu) { unsigned int rx_buf_size; - if (mtu > XGMAC_JUMBO_PACKET_MTU) { - netdev_alert(netdev, "MTU exceeds maximum supported value\n"); - return -EINVAL; - } - rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; rx_buf_size = clamp_val(rx_buf_size, XGBE_RX_MIN_BUF_SIZE, PAGE_SIZE); @@ -271,48 +280,161 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu) return rx_buf_size; } -static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata) +static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata, + struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; - struct xgbe_channel *channel; enum xgbe_int int_id; + + if (channel->tx_ring && channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI_RI; + else if (channel->tx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI; + else if (channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_RI; + else + return; + + hw_if->enable_int(channel, int_id); +} + +static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; unsigned int i; channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (channel->tx_ring && channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI_RI; - else if (channel->tx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI; - else if (channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_RI; - else - continue; + for (i = 0; i < pdata->channel_count; i++, channel++) + xgbe_enable_rx_tx_int(pdata, channel); +} - hw_if->enable_int(channel, int_id); - } +static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, + struct xgbe_channel *channel) +{ + struct xgbe_hw_if *hw_if = &pdata->hw_if; + enum xgbe_int int_id; + + if (channel->tx_ring && channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI_RI; + else if (channel->tx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI; + else if (channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_RI; + else + return; + + hw_if->disable_int(channel, int_id); } static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata) { - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; - enum xgbe_int int_id; unsigned int i; channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (channel->tx_ring && channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI_RI; - else if (channel->tx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI; - else if (channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_RI; - else - continue; + for (i = 0; i < pdata->channel_count; i++, channel++) + xgbe_disable_rx_tx_int(pdata, channel); +} + +static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period, + unsigned int *count, const char *area) +{ + if (time_before(jiffies, *period)) { + (*count)++; + } else { + *period = jiffies + (ecc_sec_period * HZ); + *count = 1; + } - hw_if->disable_int(channel, int_id); + if (*count > ecc_sec_info_threshold) + dev_warn_once(pdata->dev, + "%s ECC corrected errors exceed informational threshold\n", + area); + + if (*count > ecc_sec_warn_threshold) { + dev_warn_once(pdata->dev, + "%s ECC corrected errors exceed warning threshold\n", + area); + return true; } + + return false; +} + +static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period, + unsigned int *count, const char *area) +{ + if (time_before(jiffies, *period)) { + (*count)++; + } else { + *period = jiffies + (ecc_ded_period * HZ); + *count = 1; + } + + if (*count > ecc_ded_threshold) { + netdev_alert(pdata->netdev, + "%s ECC detected errors exceed threshold\n", + area); + return true; + } + + return false; +} + +static irqreturn_t xgbe_ecc_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = data; + unsigned int ecc_isr; + bool stop = false; + + /* Mask status with only the interrupts we care about */ + ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR); + ecc_isr &= XP_IOREAD(pdata, XP_ECC_IER); + netif_dbg(pdata, intr, pdata->netdev, "ECC_ISR=%#010x\n", ecc_isr); + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->tx_ded_period, + &pdata->tx_ded_count, "TX fifo"); + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->rx_ded_period, + &pdata->rx_ded_count, "RX fifo"); + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->desc_ded_period, + &pdata->desc_ded_count, + "descriptor cache"); + } + + if (stop) { + pdata->hw_if.disable_ecc_ded(pdata); + schedule_work(&pdata->stopdev_work); + goto out; + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_SEC)) { + if (xgbe_ecc_sec(pdata, &pdata->tx_sec_period, + &pdata->tx_sec_count, "TX fifo")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_TX); + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_SEC)) + if (xgbe_ecc_sec(pdata, &pdata->rx_sec_period, + &pdata->rx_sec_count, "RX fifo")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_RX); + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_SEC)) + if (xgbe_ecc_sec(pdata, &pdata->desc_sec_period, + &pdata->desc_sec_count, "descriptor cache")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_DESC); + +out: + /* Clear all ECC interrupts */ + XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr); + + return IRQ_HANDLED; } static irqreturn_t xgbe_isr(int irq, void *data) @@ -321,7 +443,7 @@ static irqreturn_t xgbe_isr(int irq, void *data) struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; unsigned int dma_isr, dma_ch_isr; - unsigned int mac_isr, mac_tssr; + unsigned int mac_isr, mac_tssr, mac_mdioisr; unsigned int i; /* The DMA interrupt status register also reports MAC and MTL @@ -358,6 +480,13 @@ static irqreturn_t xgbe_isr(int irq, void *data) /* Turn on polling */ __napi_schedule_irqoff(&pdata->napi); } + } else { + /* Don't clear Rx/Tx status if doing per channel DMA + * interrupts, these will be cleared by the ISR for + * per channel DMA interrupts. + */ + XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0); + XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0); } if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU)) @@ -367,13 +496,16 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE)) schedule_work(&pdata->restart_work); - /* Clear all interrupt signals */ + /* Clear interrupt signals */ XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr); } if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) { mac_isr = XGMAC_IOREAD(pdata, MAC_ISR); + netif_dbg(pdata, intr, pdata->netdev, "MAC_ISR=%#010x\n", + mac_isr); + if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS)) hw_if->tx_mmc_int(pdata); @@ -383,6 +515,9 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (XGMAC_GET_BITS(mac_isr, MAC_ISR, TSIS)) { mac_tssr = XGMAC_IOREAD(pdata, MAC_TSSR); + netif_dbg(pdata, intr, pdata->netdev, + "MAC_TSSR=%#010x\n", mac_tssr); + if (XGMAC_GET_BITS(mac_tssr, MAC_TSSR, TXTSC)) { /* Read Tx Timestamp to clear interrupt */ pdata->tx_tstamp = @@ -391,8 +526,31 @@ static irqreturn_t xgbe_isr(int irq, void *data) &pdata->tx_tstamp_work); } } + + if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) { + mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR); + + netif_dbg(pdata, intr, pdata->netdev, + "MAC_MDIOISR=%#010x\n", mac_mdioisr); + + if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR, + SNGLCOMPINT)) + complete(&pdata->mdio_complete); + } } + /* If there is not a separate AN irq, handle it here */ + if (pdata->dev_irq == pdata->an_irq) + pdata->phy_if.an_isr(irq, pdata); + + /* If there is not a separate ECC irq, handle it here */ + if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq)) + xgbe_ecc_isr(irq, pdata); + + /* If there is not a separate I2C irq, handle it here */ + if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq)) + pdata->i2c_if.i2c_isr(irq, pdata); + isr_done: return IRQ_HANDLED; } @@ -400,18 +558,29 @@ isr_done: static irqreturn_t xgbe_dma_isr(int irq, void *data) { struct xgbe_channel *channel = data; + struct xgbe_prv_data *pdata = channel->pdata; + unsigned int dma_status; /* Per channel DMA interrupts are enabled, so we use the per * channel napi structure and not the private data napi structure */ if (napi_schedule_prep(&channel->napi)) { /* Disable Tx and Rx interrupts */ - disable_irq_nosync(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_disable_rx_tx_int(pdata, channel); + else + disable_irq_nosync(channel->dma_irq); /* Turn on polling */ __napi_schedule_irqoff(&channel->napi); } + /* Clear Tx/Rx signals */ + dma_status = 0; + XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1); + XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1); + XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status); + return IRQ_HANDLED; } @@ -428,7 +597,10 @@ static void xgbe_tx_timer(unsigned long data) if (napi_schedule_prep(napi)) { /* Disable Tx and Rx interrupts */ if (pdata->per_channel_irq) - disable_irq_nosync(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_disable_rx_tx_int(pdata, channel); + else + disable_irq_nosync(channel->dma_irq); else xgbe_disable_rx_tx_ints(pdata); @@ -595,6 +767,10 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) hw_feat->tx_ch_cnt++; hw_feat->tc_cnt++; + /* Translate the fifo sizes into actual numbers */ + hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7); + hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7); + DBGPR("<--xgbe_get_all_hw_features\n"); } @@ -657,6 +833,16 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) return ret; } + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) { + ret = devm_request_irq(pdata->dev, pdata->ecc_irq, xgbe_ecc_isr, + 0, pdata->ecc_name, pdata); + if (ret) { + netdev_alert(netdev, "error requesting ecc irq %d\n", + pdata->ecc_irq); + goto err_dev_irq; + } + } + if (!pdata->per_channel_irq) return 0; @@ -673,17 +859,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) if (ret) { netdev_alert(netdev, "error requesting irq %d\n", channel->dma_irq); - goto err_irq; + goto err_dma_irq; } } return 0; -err_irq: +err_dma_irq: /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ for (i--, channel--; i < pdata->channel_count; i--, channel--) devm_free_irq(pdata->dev, channel->dma_irq, channel); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + +err_dev_irq: devm_free_irq(pdata->dev, pdata->dev_irq, pdata); return ret; @@ -696,6 +886,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + if (!pdata->per_channel_irq) return; @@ -783,7 +976,7 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) DBGPR("<--xgbe_free_rx_data\n"); } -static int xgbe_phy_init(struct xgbe_prv_data *pdata) +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; @@ -879,16 +1072,16 @@ static int xgbe_start(struct xgbe_prv_data *pdata) hw_if->init(pdata); - ret = phy_if->phy_start(pdata); - if (ret) - goto err_phy; - xgbe_napi_enable(pdata, 1); ret = xgbe_request_irqs(pdata); if (ret) goto err_napi; + ret = phy_if->phy_start(pdata); + if (ret) + goto err_irqs; + hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); @@ -897,16 +1090,18 @@ static int xgbe_start(struct xgbe_prv_data *pdata) xgbe_start_timers(pdata); queue_work(pdata->dev_workqueue, &pdata->service_work); + clear_bit(XGBE_STOPPED, &pdata->dev_state); + DBGPR("<--xgbe_start\n"); return 0; +err_irqs: + xgbe_free_irqs(pdata); + err_napi: xgbe_napi_disable(pdata, 1); - phy_if->phy_stop(pdata); - -err_phy: hw_if->exit(pdata); return ret; @@ -923,6 +1118,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_stop\n"); + if (test_bit(XGBE_STOPPED, &pdata->dev_state)) + return; + netif_tx_stop_all_queues(netdev); xgbe_stop_timers(pdata); @@ -948,9 +1146,29 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) netdev_tx_reset_queue(txq); } + set_bit(XGBE_STOPPED, &pdata->dev_state); + DBGPR("<--xgbe_stop\n"); } +static void xgbe_stopdev(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + stopdev_work); + + rtnl_lock(); + + xgbe_stop(pdata); + + xgbe_free_tx_data(pdata); + xgbe_free_rx_data(pdata); + + rtnl_unlock(); + + netdev_alert(pdata->netdev, "device stopped\n"); +} + static void xgbe_restart_dev(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_restart_dev\n"); @@ -1297,8 +1515,8 @@ static int xgbe_open(struct net_device *netdev) DBGPR("-->xgbe_open\n"); - /* Initialize the phy */ - ret = xgbe_phy_init(pdata); + /* Reset the phy settings */ + ret = xgbe_phy_reset(pdata); if (ret) return ret; @@ -1333,6 +1551,7 @@ static int xgbe_open(struct net_device *netdev) INIT_WORK(&pdata->service_work, xgbe_service); INIT_WORK(&pdata->restart_work, xgbe_restart); + INIT_WORK(&pdata->stopdev_work, xgbe_stopdev); INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp); xgbe_init_timers(pdata); @@ -2041,6 +2260,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) { struct xgbe_channel *channel = container_of(napi, struct xgbe_channel, napi); + struct xgbe_prv_data *pdata = channel->pdata; int processed = 0; DBGPR("-->xgbe_one_poll: budget=%d\n", budget); @@ -2057,7 +2277,10 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) napi_complete_done(napi, processed); /* Enable Tx and Rx interrupts */ - enable_irq(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_enable_rx_tx_int(pdata, channel); + else + enable_irq(channel->dma_irq); } DBGPR("<--xgbe_one_poll: received = %d\n", processed); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 4007b429c80c..920566a3a599 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -272,97 +272,86 @@ static int xgbe_set_pauseparam(struct net_device *netdev, return ret; } -static int xgbe_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int xgbe_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - cmd->phy_address = pdata->phy.address; + cmd->base.phy_address = pdata->phy.address; - cmd->supported = pdata->phy.supported; - cmd->advertising = pdata->phy.advertising; - cmd->lp_advertising = pdata->phy.lp_advertising; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + pdata->phy.supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + pdata->phy.advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + pdata->phy.lp_advertising); - cmd->autoneg = pdata->phy.autoneg; - ethtool_cmd_speed_set(cmd, pdata->phy.speed); - cmd->duplex = pdata->phy.duplex; + cmd->base.autoneg = pdata->phy.autoneg; + cmd->base.speed = pdata->phy.speed; + cmd->base.duplex = pdata->phy.duplex; - cmd->port = PORT_NONE; - cmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_NONE; return 0; } -static int xgbe_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int xgbe_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); + u32 advertising; u32 speed; int ret; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; - if (cmd->phy_address != pdata->phy.address) { + if (cmd->base.phy_address != pdata->phy.address) { netdev_err(netdev, "invalid phy address %hhu\n", - cmd->phy_address); + cmd->base.phy_address); return -EINVAL; } - if ((cmd->autoneg != AUTONEG_ENABLE) && - (cmd->autoneg != AUTONEG_DISABLE)) { + if ((cmd->base.autoneg != AUTONEG_ENABLE) && + (cmd->base.autoneg != AUTONEG_DISABLE)) { netdev_err(netdev, "unsupported autoneg %hhu\n", - cmd->autoneg); + cmd->base.autoneg); return -EINVAL; } - if (cmd->autoneg == AUTONEG_DISABLE) { - switch (speed) { - case SPEED_10000: - break; - case SPEED_2500: - if (pdata->speed_set != XGBE_SPEEDSET_2500_10000) { - netdev_err(netdev, "unsupported speed %u\n", - speed); - return -EINVAL; - } - break; - case SPEED_1000: - if (pdata->speed_set != XGBE_SPEEDSET_1000_10000) { - netdev_err(netdev, "unsupported speed %u\n", - speed); - return -EINVAL; - } - break; - default: + if (cmd->base.autoneg == AUTONEG_DISABLE) { + if (!pdata->phy_if.phy_valid_speed(pdata, speed)) { netdev_err(netdev, "unsupported speed %u\n", speed); return -EINVAL; } - if (cmd->duplex != DUPLEX_FULL) { + if (cmd->base.duplex != DUPLEX_FULL) { netdev_err(netdev, "unsupported duplex %hhu\n", - cmd->duplex); + cmd->base.duplex); return -EINVAL; } } + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + netif_dbg(pdata, link, netdev, "requested advertisement %#x, phy supported %#x\n", - cmd->advertising, pdata->phy.supported); + advertising, pdata->phy.supported); - cmd->advertising &= pdata->phy.supported; - if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising) { + advertising &= pdata->phy.supported; + if ((cmd->base.autoneg == AUTONEG_ENABLE) && !advertising) { netdev_err(netdev, "unsupported requested advertisement\n"); return -EINVAL; } ret = 0; - pdata->phy.autoneg = cmd->autoneg; + pdata->phy.autoneg = cmd->base.autoneg; pdata->phy.speed = speed; - pdata->phy.duplex = cmd->duplex; - pdata->phy.advertising = cmd->advertising; + pdata->phy.duplex = cmd->base.duplex; + pdata->phy.advertising = advertising; - if (cmd->autoneg == AUTONEG_ENABLE) + if (cmd->base.autoneg == AUTONEG_ENABLE) pdata->phy.advertising |= ADVERTISED_Autoneg; else pdata->phy.advertising &= ~ADVERTISED_Autoneg; @@ -602,8 +591,6 @@ static int xgbe_get_ts_info(struct net_device *netdev, } static const struct ethtool_ops xgbe_ethtool_ops = { - .get_settings = xgbe_get_settings, - .set_settings = xgbe_set_settings, .get_drvinfo = xgbe_get_drvinfo, .get_msglevel = xgbe_get_msglevel, .set_msglevel = xgbe_set_msglevel, @@ -621,6 +608,8 @@ static const struct ethtool_ops xgbe_ethtool_ops = { .get_rxfh = xgbe_get_rxfh, .set_rxfh = xgbe_set_rxfh, .get_ts_info = xgbe_get_ts_info, + .get_link_ksettings = xgbe_get_link_ksettings, + .set_link_ksettings = xgbe_set_link_ksettings, }; const struct ethtool_ops *xgbe_get_ethtool_ops(void) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c new file mode 100644 index 000000000000..0c7088a426e9 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -0,0 +1,492 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/delay.h> +#include <linux/completion.h> +#include <linux/mutex.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_ABORT_COUNT 500 +#define XGBE_DISABLE_COUNT 1000 + +#define XGBE_STD_SPEED 1 + +#define XGBE_INTR_RX_FULL BIT(IC_RAW_INTR_STAT_RX_FULL_INDEX) +#define XGBE_INTR_TX_EMPTY BIT(IC_RAW_INTR_STAT_TX_EMPTY_INDEX) +#define XGBE_INTR_TX_ABRT BIT(IC_RAW_INTR_STAT_TX_ABRT_INDEX) +#define XGBE_INTR_STOP_DET BIT(IC_RAW_INTR_STAT_STOP_DET_INDEX) +#define XGBE_DEFAULT_INT_MASK (XGBE_INTR_RX_FULL | \ + XGBE_INTR_TX_EMPTY | \ + XGBE_INTR_TX_ABRT | \ + XGBE_INTR_STOP_DET) + +#define XGBE_I2C_READ BIT(8) +#define XGBE_I2C_STOP BIT(9) + +static int xgbe_i2c_abort(struct xgbe_prv_data *pdata) +{ + unsigned int wait = XGBE_ABORT_COUNT; + + /* Must be enabled to recognize the abort request */ + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, 1); + + /* Issue the abort */ + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, ABORT, 1); + + while (wait--) { + if (!XI2C_IOREAD_BITS(pdata, IC_ENABLE, ABORT)) + return 0; + + usleep_range(500, 600); + } + + return -EBUSY; +} + +static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable) +{ + unsigned int wait = XGBE_DISABLE_COUNT; + unsigned int mode = enable ? 1 : 0; + + while (wait--) { + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, mode); + if (XI2C_IOREAD_BITS(pdata, IC_ENABLE_STATUS, EN) == mode) + return 0; + + usleep_range(100, 110); + } + + return -EBUSY; +} + +static int xgbe_i2c_disable(struct xgbe_prv_data *pdata) +{ + unsigned int ret; + + ret = xgbe_i2c_set_enable(pdata, false); + if (ret) { + /* Disable failed, try an abort */ + ret = xgbe_i2c_abort(pdata); + if (ret) + return ret; + + /* Abort succeeded, try to disable again */ + ret = xgbe_i2c_set_enable(pdata, false); + } + + return ret; +} + +static int xgbe_i2c_enable(struct xgbe_prv_data *pdata) +{ + return xgbe_i2c_set_enable(pdata, true); +} + +static void xgbe_i2c_clear_all_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOREAD(pdata, IC_CLR_INTR); +} + +static void xgbe_i2c_disable_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOWRITE(pdata, IC_INTR_MASK, 0); +} + +static void xgbe_i2c_enable_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOWRITE(pdata, IC_INTR_MASK, XGBE_DEFAULT_INT_MASK); +} + +static void xgbe_i2c_write(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int tx_slots; + unsigned int cmd; + + /* Configured to never receive Rx overflows, so fill up Tx fifo */ + tx_slots = pdata->i2c.tx_fifo_size - XI2C_IOREAD(pdata, IC_TXFLR); + while (tx_slots && state->tx_len) { + if (state->op->cmd == XGBE_I2C_CMD_READ) + cmd = XGBE_I2C_READ; + else + cmd = *state->tx_buf++; + + if (state->tx_len == 1) + XI2C_SET_BITS(cmd, IC_DATA_CMD, STOP, 1); + + XI2C_IOWRITE(pdata, IC_DATA_CMD, cmd); + + tx_slots--; + state->tx_len--; + } + + /* No more Tx operations, so ignore TX_EMPTY and return */ + if (!state->tx_len) + XI2C_IOWRITE_BITS(pdata, IC_INTR_MASK, TX_EMPTY, 0); +} + +static void xgbe_i2c_read(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int rx_slots; + + /* Anything to be read? */ + if (state->op->cmd != XGBE_I2C_CMD_READ) + return; + + rx_slots = XI2C_IOREAD(pdata, IC_RXFLR); + while (rx_slots && state->rx_len) { + *state->rx_buf++ = XI2C_IOREAD(pdata, IC_DATA_CMD); + state->rx_len--; + rx_slots--; + } +} + +static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata, + unsigned int isr) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + + if (isr & XGBE_INTR_TX_ABRT) { + state->tx_abort_source = XI2C_IOREAD(pdata, IC_TX_ABRT_SOURCE); + XI2C_IOREAD(pdata, IC_CLR_TX_ABRT); + } + + if (isr & XGBE_INTR_STOP_DET) + XI2C_IOREAD(pdata, IC_CLR_STOP_DET); +} + +static irqreturn_t xgbe_i2c_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int isr; + + isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT); + netif_dbg(pdata, intr, pdata->netdev, + "I2C interrupt received: status=%#010x\n", isr); + + xgbe_i2c_clear_isr_interrupts(pdata, isr); + + if (isr & XGBE_INTR_TX_ABRT) { + netif_dbg(pdata, link, pdata->netdev, + "I2C TX_ABRT received (%#010x) for target %#04x\n", + state->tx_abort_source, state->op->target); + + xgbe_i2c_disable_interrupts(pdata); + + state->ret = -EIO; + goto out; + } + + /* Check for data in the Rx fifo */ + xgbe_i2c_read(pdata); + + /* Fill up the Tx fifo next */ + xgbe_i2c_write(pdata); + +out: + /* Complete on an error or STOP condition */ + if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET)) + complete(&pdata->i2c_complete); + + return IRQ_HANDLED; +} + +static void xgbe_i2c_set_mode(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XI2C_IOREAD(pdata, IC_CON); + XI2C_SET_BITS(reg, IC_CON, MASTER_MODE, 1); + XI2C_SET_BITS(reg, IC_CON, SLAVE_DISABLE, 1); + XI2C_SET_BITS(reg, IC_CON, RESTART_EN, 1); + XI2C_SET_BITS(reg, IC_CON, SPEED, XGBE_STD_SPEED); + XI2C_SET_BITS(reg, IC_CON, RX_FIFO_FULL_HOLD, 1); + XI2C_IOWRITE(pdata, IC_CON, reg); +} + +static void xgbe_i2c_get_features(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c *i2c = &pdata->i2c; + unsigned int reg; + + reg = XI2C_IOREAD(pdata, IC_COMP_PARAM_1); + i2c->max_speed_mode = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + MAX_SPEED_MODE); + i2c->rx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + RX_BUFFER_DEPTH); + i2c->tx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + TX_BUFFER_DEPTH); + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "I2C features: %s=%u, %s=%u, %s=%u\n", + "MAX_SPEED_MODE", i2c->max_speed_mode, + "RX_BUFFER_DEPTH", i2c->rx_fifo_size, + "TX_BUFFER_DEPTH", i2c->tx_fifo_size); +} + +static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr) +{ + XI2C_IOWRITE(pdata, IC_TAR, addr); +} + +static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata) +{ + if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT)) + return IRQ_HANDLED; + + return xgbe_i2c_isr(irq, pdata); +} + +static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + int ret; + + mutex_lock(&pdata->i2c_mutex); + + reinit_completion(&pdata->i2c_complete); + + ret = xgbe_i2c_disable(pdata); + if (ret) { + netdev_err(pdata->netdev, "failed to disable i2c master\n"); + goto unlock; + } + + xgbe_i2c_set_target(pdata, op->target); + + memset(state, 0, sizeof(*state)); + state->op = op; + state->tx_len = op->len; + state->tx_buf = op->buf; + state->rx_len = op->len; + state->rx_buf = op->buf; + + xgbe_i2c_clear_all_interrupts(pdata); + ret = xgbe_i2c_enable(pdata); + if (ret) { + netdev_err(pdata->netdev, "failed to enable i2c master\n"); + goto unlock; + } + + /* Enabling the interrupts will cause the TX FIFO empty interrupt to + * fire and begin to process the command via the ISR. + */ + xgbe_i2c_enable_interrupts(pdata); + + if (!wait_for_completion_timeout(&pdata->i2c_complete, HZ)) { + netdev_err(pdata->netdev, "i2c operation timed out\n"); + ret = -ETIMEDOUT; + goto disable; + } + + ret = state->ret; + if (ret) { + if (state->tx_abort_source & IC_TX_ABRT_7B_ADDR_NOACK) + ret = -ENOTCONN; + else if (state->tx_abort_source & IC_TX_ABRT_ARB_LOST) + ret = -EAGAIN; + } + +disable: + xgbe_i2c_disable_interrupts(pdata); + xgbe_i2c_disable(pdata); + +unlock: + mutex_unlock(&pdata->i2c_mutex); + + return ret; +} + +static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) +{ + if (!pdata->i2c.started) + return; + + netif_dbg(pdata, link, pdata->netdev, "stopping I2C\n"); + + pdata->i2c.started = 0; + + xgbe_i2c_disable_interrupts(pdata); + xgbe_i2c_disable(pdata); + xgbe_i2c_clear_all_interrupts(pdata); + + if (pdata->dev_irq != pdata->i2c_irq) + devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); +} + +static int xgbe_i2c_start(struct xgbe_prv_data *pdata) +{ + int ret; + + if (pdata->i2c.started) + return 0; + + netif_dbg(pdata, link, pdata->netdev, "starting I2C\n"); + + /* If we have a separate I2C irq, enable it */ + if (pdata->dev_irq != pdata->i2c_irq) { + ret = devm_request_irq(pdata->dev, pdata->i2c_irq, + xgbe_i2c_isr, 0, pdata->i2c_name, + pdata); + if (ret) { + netdev_err(pdata->netdev, "i2c irq request failed\n"); + return ret; + } + } + + pdata->i2c.started = 1; + + return 0; +} + +static int xgbe_i2c_init(struct xgbe_prv_data *pdata) +{ + int ret; + + xgbe_i2c_disable_interrupts(pdata); + + ret = xgbe_i2c_disable(pdata); + if (ret) { + dev_err(pdata->dev, "failed to disable i2c master\n"); + return ret; + } + + xgbe_i2c_get_features(pdata); + + xgbe_i2c_set_mode(pdata); + + xgbe_i2c_clear_all_interrupts(pdata); + + return 0; +} + +void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *i2c_if) +{ + i2c_if->i2c_init = xgbe_i2c_init; + + i2c_if->i2c_start = xgbe_i2c_start; + i2c_if->i2c_stop = xgbe_i2c_stop; + + i2c_if->i2c_xfer = xgbe_i2c_xfer; + + i2c_if->i2c_isr = xgbe_i2c_combined_isr; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 9de078819aa6..b87a89988ffd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -116,19 +116,10 @@ #include <linux/module.h> #include <linux/device.h> -#include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/io.h> -#include <linux/of.h> -#include <linux/of_net.h> -#include <linux/of_address.h> -#include <linux/of_platform.h> -#include <linux/clk.h> -#include <linux/property.h> -#include <linux/acpi.h> -#include <linux/mdio.h> #include "xgbe.h" #include "xgbe-common.h" @@ -145,42 +136,6 @@ MODULE_PARM_DESC(debug, " Network interface message level setting"); static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP); -static const u32 xgbe_serdes_blwc[] = { - XGBE_SPEED_1000_BLWC, - XGBE_SPEED_2500_BLWC, - XGBE_SPEED_10000_BLWC, -}; - -static const u32 xgbe_serdes_cdr_rate[] = { - XGBE_SPEED_1000_CDR, - XGBE_SPEED_2500_CDR, - XGBE_SPEED_10000_CDR, -}; - -static const u32 xgbe_serdes_pq_skew[] = { - XGBE_SPEED_1000_PQ, - XGBE_SPEED_2500_PQ, - XGBE_SPEED_10000_PQ, -}; - -static const u32 xgbe_serdes_tx_amp[] = { - XGBE_SPEED_1000_TXAMP, - XGBE_SPEED_2500_TXAMP, - XGBE_SPEED_10000_TXAMP, -}; - -static const u32 xgbe_serdes_dfe_tap_cfg[] = { - XGBE_SPEED_1000_DFE_TAP_CONFIG, - XGBE_SPEED_2500_DFE_TAP_CONFIG, - XGBE_SPEED_10000_DFE_TAP_CONFIG, -}; - -static const u32 xgbe_serdes_dfe_tap_ena[] = { - XGBE_SPEED_1000_DFE_TAP_ENABLE, - XGBE_SPEED_2500_DFE_TAP_ENABLE, - XGBE_SPEED_10000_DFE_TAP_ENABLE, -}; - static void xgbe_default_config(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_default_config\n"); @@ -206,455 +161,124 @@ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata) { xgbe_init_function_ptrs_dev(&pdata->hw_if); xgbe_init_function_ptrs_phy(&pdata->phy_if); + xgbe_init_function_ptrs_i2c(&pdata->i2c_if); xgbe_init_function_ptrs_desc(&pdata->desc_if); -} - -#ifdef CONFIG_ACPI -static int xgbe_acpi_support(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - u32 property; - int ret; - - /* Obtain the system clock setting */ - ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property); - if (ret) { - dev_err(dev, "unable to obtain %s property\n", - XGBE_ACPI_DMA_FREQ); - return ret; - } - pdata->sysclk_rate = property; - - /* Obtain the PTP clock setting */ - ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property); - if (ret) { - dev_err(dev, "unable to obtain %s property\n", - XGBE_ACPI_PTP_FREQ); - return ret; - } - pdata->ptpclk_rate = property; - return 0; + pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if); } -#else /* CONFIG_ACPI */ -static int xgbe_acpi_support(struct xgbe_prv_data *pdata) -{ - return -EINVAL; -} -#endif /* CONFIG_ACPI */ -#ifdef CONFIG_OF -static int xgbe_of_support(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - - /* Obtain the system clock setting */ - pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK); - if (IS_ERR(pdata->sysclk)) { - dev_err(dev, "dma devm_clk_get failed\n"); - return PTR_ERR(pdata->sysclk); - } - pdata->sysclk_rate = clk_get_rate(pdata->sysclk); - - /* Obtain the PTP clock setting */ - pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK); - if (IS_ERR(pdata->ptpclk)) { - dev_err(dev, "ptp devm_clk_get failed\n"); - return PTR_ERR(pdata->ptpclk); - } - pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk); - - return 0; -} - -static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - struct device_node *phy_node; - struct platform_device *phy_pdev; - - phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0); - if (phy_node) { - /* Old style device tree: - * The XGBE and PHY resources are separate - */ - phy_pdev = of_find_device_by_node(phy_node); - of_node_put(phy_node); - } else { - /* New style device tree: - * The XGBE and PHY resources are grouped together with - * the PHY resources listed last - */ - get_device(dev); - phy_pdev = pdata->pdev; - } - - return phy_pdev; -} -#else /* CONFIG_OF */ -static int xgbe_of_support(struct xgbe_prv_data *pdata) -{ - return -EINVAL; -} - -static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - return NULL; -} -#endif /* CONFIG_OF */ - -static unsigned int xgbe_resource_count(struct platform_device *pdev, - unsigned int type) -{ - unsigned int count; - int i; - - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *res = &pdev->resource[i]; - - if (type == resource_type(res)) - count++; - } - - return count; -} - -static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - struct platform_device *phy_pdev; - - if (pdata->use_acpi) { - get_device(pdata->dev); - phy_pdev = pdata->pdev; - } else { - phy_pdev = xgbe_of_get_phy_pdev(pdata); - } - - return phy_pdev; -} - -static int xgbe_probe(struct platform_device *pdev) +struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev) { struct xgbe_prv_data *pdata; struct net_device *netdev; - struct device *dev = &pdev->dev, *phy_dev; - struct platform_device *phy_pdev; - struct resource *res; - const char *phy_mode; - unsigned int i, phy_memnum, phy_irqnum; - enum dev_dma_attr attr; - int ret; - - DBGPR("--> xgbe_probe\n"); netdev = alloc_etherdev_mq(sizeof(struct xgbe_prv_data), XGBE_MAX_DMA_CHANNELS); if (!netdev) { - dev_err(dev, "alloc_etherdev failed\n"); - ret = -ENOMEM; - goto err_alloc; + dev_err(dev, "alloc_etherdev_mq failed\n"); + return ERR_PTR(-ENOMEM); } SET_NETDEV_DEV(netdev, dev); pdata = netdev_priv(netdev); pdata->netdev = netdev; - pdata->pdev = pdev; - pdata->adev = ACPI_COMPANION(dev); pdata->dev = dev; - platform_set_drvdata(pdev, netdev); spin_lock_init(&pdata->lock); spin_lock_init(&pdata->xpcs_lock); mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock); + mutex_init(&pdata->i2c_mutex); + init_completion(&pdata->i2c_complete); + init_completion(&pdata->mdio_complete); pdata->msg_enable = netif_msg_init(debug, default_msg_level); set_bit(XGBE_DOWN, &pdata->dev_state); + set_bit(XGBE_STOPPED, &pdata->dev_state); - /* Check if we should use ACPI or DT */ - pdata->use_acpi = dev->of_node ? 0 : 1; - - phy_pdev = xgbe_get_phy_pdev(pdata); - if (!phy_pdev) { - dev_err(dev, "unable to obtain phy device\n"); - ret = -EINVAL; - goto err_phydev; - } - phy_dev = &phy_pdev->dev; - - if (pdev == phy_pdev) { - /* New style device tree or ACPI: - * The XGBE and PHY resources are grouped together with - * the PHY resources listed last - */ - phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; - phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; - } else { - /* Old style device tree: - * The XGBE and PHY resources are separate - */ - phy_memnum = 0; - phy_irqnum = 0; - } - - /* Set and validate the number of descriptors for a ring */ - BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); - pdata->tx_desc_count = XGBE_TX_DESC_CNT; - if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) { - dev_err(dev, "tx descriptor count (%d) is not valid\n", - pdata->tx_desc_count); - ret = -EINVAL; - goto err_io; - } - BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT); - pdata->rx_desc_count = XGBE_RX_DESC_CNT; - if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) { - dev_err(dev, "rx descriptor count (%d) is not valid\n", - pdata->rx_desc_count); - ret = -EINVAL; - goto err_io; - } - - /* Obtain the mmio areas for the device */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pdata->xgmac_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->xgmac_regs)) { - dev_err(dev, "xgmac ioremap failed\n"); - ret = PTR_ERR(pdata->xgmac_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - pdata->xpcs_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->xpcs_regs)) { - dev_err(dev, "xpcs ioremap failed\n"); - ret = PTR_ERR(pdata->xpcs_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->rxtx_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->rxtx_regs)) { - dev_err(dev, "rxtx ioremap failed\n"); - ret = PTR_ERR(pdata->rxtx_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "rxtx_regs = %p\n", pdata->rxtx_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->sir0_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->sir0_regs)) { - dev_err(dev, "sir0 ioremap failed\n"); - ret = PTR_ERR(pdata->sir0_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "sir0_regs = %p\n", pdata->sir0_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->sir1_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->sir1_regs)) { - dev_err(dev, "sir1 ioremap failed\n"); - ret = PTR_ERR(pdata->sir1_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); - - /* Retrieve the MAC address */ - ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, - pdata->mac_addr, - sizeof(pdata->mac_addr)); - if (ret || !is_valid_ether_addr(pdata->mac_addr)) { - dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY); - if (!ret) - ret = -EINVAL; - goto err_io; - } - - /* Retrieve the PHY mode - it must be "xgmii" */ - ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY, - &phy_mode); - if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) { - dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY); - if (!ret) - ret = -EINVAL; - goto err_io; - } - pdata->phy_mode = PHY_INTERFACE_MODE_XGMII; - - /* Check for per channel interrupt support */ - if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) - pdata->per_channel_irq = 1; + return pdata; +} - /* Retrieve the PHY speedset */ - ret = device_property_read_u32(phy_dev, XGBE_SPEEDSET_PROPERTY, - &pdata->speed_set); - if (ret) { - dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); - goto err_io; - } +void xgbe_free_pdata(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - case XGBE_SPEEDSET_2500_10000: - break; - default: - dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); - ret = -EINVAL; - goto err_io; - } + free_netdev(netdev); +} - /* Retrieve the PHY configuration properties */ - if (device_property_present(phy_dev, XGBE_BLWC_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_BLWC_PROPERTY, - pdata->serdes_blwc, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_BLWC_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_blwc, xgbe_serdes_blwc, - sizeof(pdata->serdes_blwc)); - } +void xgbe_set_counts(struct xgbe_prv_data *pdata) +{ + /* Set all the function pointers */ + xgbe_init_all_fptrs(pdata); - if (device_property_present(phy_dev, XGBE_CDR_RATE_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_CDR_RATE_PROPERTY, - pdata->serdes_cdr_rate, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_CDR_RATE_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_cdr_rate, xgbe_serdes_cdr_rate, - sizeof(pdata->serdes_cdr_rate)); - } + /* Populate the hardware features */ + xgbe_get_all_hw_features(pdata); - if (device_property_present(phy_dev, XGBE_PQ_SKEW_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PQ_SKEW_PROPERTY, - pdata->serdes_pq_skew, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PQ_SKEW_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_pq_skew, xgbe_serdes_pq_skew, - sizeof(pdata->serdes_pq_skew)); - } + /* Set default max values if not provided */ + if (!pdata->tx_max_channel_count) + pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt; + if (!pdata->rx_max_channel_count) + pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt; - if (device_property_present(phy_dev, XGBE_TX_AMP_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_TX_AMP_PROPERTY, - pdata->serdes_tx_amp, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_TX_AMP_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_tx_amp, xgbe_serdes_tx_amp, - sizeof(pdata->serdes_tx_amp)); - } + if (!pdata->tx_max_q_count) + pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt; + if (!pdata->rx_max_q_count) + pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt; - if (device_property_present(phy_dev, XGBE_DFE_CFG_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_DFE_CFG_PROPERTY, - pdata->serdes_dfe_tap_cfg, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_DFE_CFG_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_dfe_tap_cfg, xgbe_serdes_dfe_tap_cfg, - sizeof(pdata->serdes_dfe_tap_cfg)); - } + /* Calculate the number of Tx and Rx rings to be created + * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set + * the number of Tx queues to the number of Tx channels + * enabled + * -Rx (DMA) Channels do not map 1-to-1 so use the actual + * number of Rx queues or maximum allowed + */ + pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(), + pdata->hw_feat.tx_ch_cnt); + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->tx_max_channel_count); + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->tx_max_q_count); - if (device_property_present(phy_dev, XGBE_DFE_ENA_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_DFE_ENA_PROPERTY, - pdata->serdes_dfe_tap_ena, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_DFE_ENA_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_dfe_tap_ena, xgbe_serdes_dfe_tap_ena, - sizeof(pdata->serdes_dfe_tap_ena)); - } + pdata->tx_q_count = pdata->tx_ring_count; - /* Obtain device settings unique to ACPI/OF */ - if (pdata->use_acpi) - ret = xgbe_acpi_support(pdata); - else - ret = xgbe_of_support(pdata); - if (ret) - goto err_io; + pdata->rx_ring_count = min_t(unsigned int, num_online_cpus(), + pdata->hw_feat.rx_ch_cnt); + pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, + pdata->rx_max_channel_count); - /* Set the DMA coherency values */ - attr = device_get_dma_attr(dev); - if (attr == DEV_DMA_NOT_SUPPORTED) { - dev_err(dev, "DMA is not supported"); - goto err_io; - } - pdata->coherent = (attr == DEV_DMA_COHERENT); - if (pdata->coherent) { - pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; - pdata->arcache = XGBE_DMA_OS_ARCACHE; - pdata->awcache = XGBE_DMA_OS_AWCACHE; - } else { - pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN; - pdata->arcache = XGBE_DMA_SYS_ARCACHE; - pdata->awcache = XGBE_DMA_SYS_AWCACHE; - } + pdata->rx_q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt, + pdata->rx_max_q_count); - /* Get the device interrupt */ - ret = platform_get_irq(pdev, 0); - if (ret < 0) { - dev_err(dev, "platform_get_irq 0 failed\n"); - goto err_io; + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "TX/RX DMA channel count = %u/%u\n", + pdata->tx_ring_count, pdata->rx_ring_count); + dev_dbg(pdata->dev, "TX/RX hardware queue count = %u/%u\n", + pdata->tx_q_count, pdata->rx_q_count); } - pdata->dev_irq = ret; +} - /* Get the auto-negotiation interrupt */ - ret = platform_get_irq(phy_pdev, phy_irqnum++); - if (ret < 0) { - dev_err(dev, "platform_get_irq phy 0 failed\n"); - goto err_io; - } - pdata->an_irq = ret; +int xgbe_config_netdev(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; + struct device *dev = pdata->dev; + unsigned int i; + int ret; netdev->irq = pdata->dev_irq; netdev->base_addr = (unsigned long)pdata->xgmac_regs; memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len); - /* Set all the function pointers */ - xgbe_init_all_fptrs(pdata); + /* Initialize ECC timestamps */ + pdata->tx_sec_period = jiffies; + pdata->tx_ded_period = jiffies; + pdata->rx_sec_period = jiffies; + pdata->rx_ded_period = jiffies; + pdata->desc_sec_period = jiffies; + pdata->desc_ded_period = jiffies; /* Issue software reset to device */ pdata->hw_if.exit(pdata); - /* Populate the hardware features */ - xgbe_get_all_hw_features(pdata); - /* Set default configuration data */ xgbe_default_config(pdata); @@ -663,33 +287,46 @@ static int xgbe_probe(struct platform_device *pdev) DMA_BIT_MASK(pdata->hw_feat.dma_width)); if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed\n"); - goto err_io; + return ret; } - /* Calculate the number of Tx and Rx rings to be created - * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set - * the number of Tx queues to the number of Tx channels - * enabled - * -Rx (DMA) Channels do not map 1-to-1 so use the actual - * number of Rx queues - */ - pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(), - pdata->hw_feat.tx_ch_cnt); - pdata->tx_q_count = pdata->tx_ring_count; + /* Set default max values if not provided */ + if (!pdata->tx_max_fifo_size) + pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size; + if (!pdata->rx_max_fifo_size) + pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size; + + /* Set and validate the number of descriptors for a ring */ + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); + pdata->tx_desc_count = XGBE_TX_DESC_CNT; + + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT); + pdata->rx_desc_count = XGBE_RX_DESC_CNT; + + /* Adjust the number of queues based on interrupts assigned */ + if (pdata->channel_irq_count) { + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->channel_irq_count); + pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, + pdata->channel_irq_count); + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, + "adjusted TX/RX DMA channel count = %u/%u\n", + pdata->tx_ring_count, pdata->rx_ring_count); + } + + /* Set the number of queues */ ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count); if (ret) { dev_err(dev, "error setting real tx queue count\n"); - goto err_io; + return ret; } - pdata->rx_ring_count = min_t(unsigned int, - netif_get_num_default_rss_queues(), - pdata->hw_feat.rx_ch_cnt); - pdata->rx_q_count = pdata->hw_feat.rx_q_cnt; ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count); if (ret) { dev_err(dev, "error setting real rx queue count\n"); - goto err_io; + return ret; } /* Initialize RSS hash key and lookup table */ @@ -704,7 +341,9 @@ static int xgbe_probe(struct platform_device *pdev) XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Call MDIO/PHY initialization routine */ - pdata->phy_if.phy_init(pdata); + ret = pdata->phy_if.phy_init(pdata); + if (ret) + return ret; /* Set device operations */ netdev->netdev_ops = xgbe_get_netdev_ops(); @@ -738,6 +377,8 @@ static int xgbe_probe(struct platform_device *pdev) pdata->netdev_features = netdev->features; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->min_mtu = 0; + netdev->max_mtu = XGMAC_JUMBO_PACKET_MTU; /* Use default watchdog timeout */ netdev->watchdog_timeo = 0; @@ -749,13 +390,21 @@ static int xgbe_probe(struct platform_device *pdev) ret = register_netdev(netdev); if (ret) { dev_err(dev, "net device registration failed\n"); - goto err_io; + return ret; } /* Create the PHY/ANEG name based on netdev name */ snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs", netdev_name(netdev)); + /* Create the ECC name based on netdev name */ + snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc", + netdev_name(netdev)); + + /* Create the I2C name based on netdev name */ + snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c", + netdev_name(netdev)); + /* Create workqueues */ pdata->dev_workqueue = create_singlethread_workqueue(netdev_name(netdev)); @@ -777,11 +426,10 @@ static int xgbe_probe(struct platform_device *pdev) xgbe_debugfs_init(pdata); - platform_device_put(phy_pdev); - - netdev_notice(netdev, "net device enabled\n"); - - DBGPR("<-- xgbe_probe\n"); + netif_dbg(pdata, drv, pdata->netdev, "%u Tx software queues\n", + pdata->tx_ring_count); + netif_dbg(pdata, drv, pdata->netdev, "%u Rx software queues\n", + pdata->rx_ring_count); return 0; @@ -791,29 +439,19 @@ err_wq: err_netdev: unregister_netdev(netdev); -err_io: - platform_device_put(phy_pdev); - -err_phydev: - free_netdev(netdev); - -err_alloc: - dev_notice(dev, "net device not enabled\n"); - return ret; } -static int xgbe_remove(struct platform_device *pdev) +void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata) { - struct net_device *netdev = platform_get_drvdata(pdev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - - DBGPR("-->xgbe_remove\n"); + struct net_device *netdev = pdata->netdev; xgbe_debugfs_exit(pdata); xgbe_ptp_unregister(pdata); + pdata->phy_if.phy_exit(pdata); + flush_workqueue(pdata->an_workqueue); destroy_workqueue(pdata->an_workqueue); @@ -821,94 +459,29 @@ static int xgbe_remove(struct platform_device *pdev) destroy_workqueue(pdata->dev_workqueue); unregister_netdev(netdev); - - free_netdev(netdev); - - DBGPR("<--xgbe_remove\n"); - - return 0; } -#ifdef CONFIG_PM -static int xgbe_suspend(struct device *dev) +static int __init xgbe_mod_init(void) { - struct net_device *netdev = dev_get_drvdata(dev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret = 0; - - DBGPR("-->xgbe_suspend\n"); - - if (netif_running(netdev)) - ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + int ret; - pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + ret = xgbe_platform_init(); + if (ret) + return ret; - DBGPR("<--xgbe_suspend\n"); + ret = xgbe_pci_init(); + if (ret) + return ret; - return ret; + return 0; } -static int xgbe_resume(struct device *dev) +static void __exit xgbe_mod_exit(void) { - struct net_device *netdev = dev_get_drvdata(dev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret = 0; - - DBGPR("-->xgbe_resume\n"); - - pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); - - if (netif_running(netdev)) { - ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); - - /* Schedule a restart in case the link or phy state changed - * while we were powered down. - */ - schedule_work(&pdata->restart_work); - } - - DBGPR("<--xgbe_resume\n"); + xgbe_pci_exit(); - return ret; + xgbe_platform_exit(); } -#endif /* CONFIG_PM */ - -#ifdef CONFIG_ACPI -static const struct acpi_device_id xgbe_acpi_match[] = { - { "AMDI8001", 0 }, - {}, -}; - -MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match); -#endif - -#ifdef CONFIG_OF -static const struct of_device_id xgbe_of_match[] = { - { .compatible = "amd,xgbe-seattle-v1a", }, - {}, -}; - -MODULE_DEVICE_TABLE(of, xgbe_of_match); -#endif - -static SIMPLE_DEV_PM_OPS(xgbe_pm_ops, xgbe_suspend, xgbe_resume); - -static struct platform_driver xgbe_driver = { - .driver = { - .name = "amd-xgbe", -#ifdef CONFIG_ACPI - .acpi_match_table = xgbe_acpi_match, -#endif -#ifdef CONFIG_OF - .of_match_table = xgbe_of_match, -#endif - .pm = &xgbe_pm_ops, - }, - .probe = xgbe_probe, - .remove = xgbe_remove, -}; -module_platform_driver(xgbe_driver); +module_init(xgbe_mod_init); +module_exit(xgbe_mod_exit); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 84c5d296d13e..0ecae7045044 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -125,303 +125,284 @@ #include "xgbe.h" #include "xgbe-common.h" -static void xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata) +static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); + int reg; - reg |= XGBE_KR_TRAINING_ENABLE; - XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT); + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg); } -static void xgbe_an_disable_kr_training(struct xgbe_prv_data *pdata) +static void xgbe_an37_disable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; + int reg; - reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); - reg &= ~XGBE_KR_TRAINING_ENABLE; - XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL); + reg &= ~XGBE_PCS_CL37_BP; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg); } -static void xgbe_pcs_power_cycle(struct xgbe_prv_data *pdata) +static void xgbe_an37_enable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + int reg; - reg |= MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL); + reg |= XGBE_PCS_CL37_BP; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg); - usleep_range(75, 100); - - reg &= ~MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg |= XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); } -static void xgbe_serdes_start_ratechange(struct xgbe_prv_data *pdata) +static void xgbe_an73_clear_interrupts(struct xgbe_prv_data *pdata) { - /* Assert Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); } -static void xgbe_serdes_complete_ratechange(struct xgbe_prv_data *pdata) +static void xgbe_an73_disable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int wait; - u16 status; - - /* Release Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); +} - /* Wait for Rx and Tx ready */ - wait = XGBE_RATECHANGE_COUNT; - while (wait--) { - usleep_range(50, 75); +static void xgbe_an73_enable_interrupts(struct xgbe_prv_data *pdata) +{ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK); +} - status = XSIR0_IOREAD(pdata, SIR0_STATUS); - if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && - XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) - goto rx_reset; +static void xgbe_an_enable_interrupts(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_enable_interrupts(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_enable_interrupts(pdata); + break; + default: + break; } +} - netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n", - status); - -rx_reset: - /* Perform Rx reset for the DFE changes */ - XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1); +static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata) +{ + xgbe_an73_clear_interrupts(pdata); + xgbe_an37_clear_interrupts(pdata); } -static void xgbe_xgmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata) { unsigned int reg; - /* Enable KR training */ - xgbe_an_enable_kr_training(pdata); - - /* Set MAC to 10G speed */ - pdata->hw_if.set_xgmii_speed(pdata); - - /* Set PCS to KR/10G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBR; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED10G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg |= XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); +} - xgbe_pcs_power_cycle(pdata); +static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata) +{ + unsigned int reg; - /* Set SerDes to 10G speed */ - xgbe_serdes_start_ratechange(pdata); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL); + reg &= ~XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_10000]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_10000]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_10000]); +static void xgbe_kr_mode(struct xgbe_prv_data *pdata) +{ + /* Enable KR training */ + xgbe_an73_enable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 10G speed */ + pdata->hw_if.set_speed(pdata, SPEED_10000); - netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KR); } -static void xgbe_gmii_2500_mode(struct xgbe_prv_data *pdata) +static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - /* Disable KR training */ - xgbe_an_disable_kr_training(pdata); + xgbe_an73_disable_kr_training(pdata); /* Set MAC to 2.5G speed */ - pdata->hw_if.set_gmii_2500_speed(pdata); + pdata->hw_if.set_speed(pdata, SPEED_2500); - /* Set PCS to KX/1G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBX; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_2500); +} - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED1G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); +static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_pcs_power_cycle(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - /* Set SerDes to 2.5G speed */ - xgbe_serdes_start_ratechange(pdata); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_1000); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL); +static void xgbe_sfi_mode(struct xgbe_prv_data *pdata) +{ + /* If a KR re-driver is present, change to KR mode instead */ + if (pdata->kr_redrv) + return xgbe_kr_mode(pdata); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_2500]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_2500]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_2500]); + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 10G speed */ + pdata->hw_if.set_speed(pdata, SPEED_10000); - netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SFI); } -static void xgbe_gmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_x_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - /* Disable KR training */ - xgbe_an_disable_kr_training(pdata); + xgbe_an73_disable_kr_training(pdata); /* Set MAC to 1G speed */ - pdata->hw_if.set_gmii_speed(pdata); + pdata->hw_if.set_speed(pdata, SPEED_1000); - /* Set PCS to KX/1G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBX; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED1G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_X); +} - xgbe_pcs_power_cycle(pdata); +static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - /* Set SerDes to 1G speed */ - xgbe_serdes_start_ratechange(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_1000); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_1000]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_1000]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_1000]); +static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_100); } -static void xgbe_cur_mode(struct xgbe_prv_data *pdata, - enum xgbe_mode *mode) +static enum xgbe_mode xgbe_cur_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - if ((reg & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR) - *mode = XGBE_MODE_KR; - else - *mode = XGBE_MODE_KX; + return pdata->phy_if.phy_impl.cur_mode(pdata); } static bool xgbe_in_kr_mode(struct xgbe_prv_data *pdata) { - enum xgbe_mode mode; - - xgbe_cur_mode(pdata, &mode); + return (xgbe_cur_mode(pdata) == XGBE_MODE_KR); +} - return (mode == XGBE_MODE_KR); +static void xgbe_change_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_kr_mode(pdata); + break; + case XGBE_MODE_SGMII_100: + xgbe_sgmii_100_mode(pdata); + break; + case XGBE_MODE_SGMII_1000: + xgbe_sgmii_1000_mode(pdata); + break; + case XGBE_MODE_X: + xgbe_x_mode(pdata); + break; + case XGBE_MODE_SFI: + xgbe_sfi_mode(pdata); + break; + case XGBE_MODE_UNKNOWN: + break; + default: + netif_dbg(pdata, link, pdata->netdev, + "invalid operation mode requested (%u)\n", mode); + } } static void xgbe_switch_mode(struct xgbe_prv_data *pdata) { - /* If we are in KR switch to KX, and vice-versa */ - if (xgbe_in_kr_mode(pdata)) { - if (pdata->speed_set == XGBE_SPEEDSET_1000_10000) - xgbe_gmii_mode(pdata); - else - xgbe_gmii_2500_mode(pdata); - } else { - xgbe_xgmii_mode(pdata); - } + xgbe_change_mode(pdata, pdata->phy_if.phy_impl.switch_mode(pdata)); } static void xgbe_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { - enum xgbe_mode cur_mode; + if (mode == xgbe_cur_mode(pdata)) + return; - xgbe_cur_mode(pdata, &cur_mode); - if (mode != cur_mode) - xgbe_switch_mode(pdata); + xgbe_change_mode(pdata, mode); } -static bool xgbe_use_xgmii_mode(struct xgbe_prv_data *pdata) +static bool xgbe_use_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_10000) - return true; - } + return pdata->phy_if.phy_impl.use_mode(pdata, mode); +} + +static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, + bool restart) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_CTRL1); + reg &= ~MDIO_VEND2_CTRL1_AN_ENABLE; - return false; + if (enable) + reg |= MDIO_VEND2_CTRL1_AN_ENABLE; + + if (restart) + reg |= MDIO_VEND2_CTRL1_AN_RESTART; + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); } -static bool xgbe_use_gmii_2500_mode(struct xgbe_prv_data *pdata) +static void xgbe_an37_restart(struct xgbe_prv_data *pdata) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_2500baseX_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_2500) - return true; - } + xgbe_an37_enable_interrupts(pdata); + xgbe_an37_set(pdata, true, true); - return false; + netif_dbg(pdata, link, pdata->netdev, "CL37 AN enabled/restarted\n"); } -static bool xgbe_use_gmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_an37_disable(struct xgbe_prv_data *pdata) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_1000) - return true; - } + xgbe_an37_set(pdata, false, false); + xgbe_an37_disable_interrupts(pdata); - return false; + netif_dbg(pdata, link, pdata->netdev, "CL37 AN disabled\n"); } -static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart) +static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable, + bool restart) { unsigned int reg; @@ -437,22 +418,62 @@ static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart) XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); } -static void xgbe_restart_an(struct xgbe_prv_data *pdata) +static void xgbe_an73_restart(struct xgbe_prv_data *pdata) +{ + xgbe_an73_enable_interrupts(pdata); + xgbe_an73_set(pdata, true, true); + + netif_dbg(pdata, link, pdata->netdev, "CL73 AN enabled/restarted\n"); +} + +static void xgbe_an73_disable(struct xgbe_prv_data *pdata) { - xgbe_set_an(pdata, true, true); + xgbe_an73_set(pdata, false, false); + xgbe_an73_disable_interrupts(pdata); - netif_dbg(pdata, link, pdata->netdev, "AN enabled/restarted\n"); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n"); +} + +static void xgbe_an_restart(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_restart(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_restart(pdata); + break; + default: + break; + } } -static void xgbe_disable_an(struct xgbe_prv_data *pdata) +static void xgbe_an_disable(struct xgbe_prv_data *pdata) { - xgbe_set_an(pdata, false, false); + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_disable(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_disable(pdata); + break; + default: + break; + } +} - netif_dbg(pdata, link, pdata->netdev, "AN disabled\n"); +static void xgbe_an_disable_all(struct xgbe_prv_data *pdata) +{ + xgbe_an73_disable(pdata); + xgbe_an37_disable(pdata); } -static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int ad_reg, lp_reg, reg; @@ -476,13 +497,15 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, /* Start KR training */ reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); if (reg & XGBE_KR_TRAINING_ENABLE) { - XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1); + if (pdata->phy_if.phy_impl.kr_training_pre) + pdata->phy_if.phy_impl.kr_training_pre(pdata); reg |= XGBE_KR_TRAINING_START; XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); - XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0); + if (pdata->phy_if.phy_impl.kr_training_post) + pdata->phy_if.phy_impl.kr_training_post(pdata); netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); @@ -491,8 +514,8 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, return XGBE_AN_PAGE_RECEIVED; } -static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_tx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { u16 msg; @@ -508,8 +531,8 @@ static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata, return XGBE_AN_PAGE_RECEIVED; } -static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_rx_bpa(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int link_support; unsigned int reg, ad_reg, lp_reg; @@ -528,12 +551,12 @@ static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata, return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || (lp_reg & XGBE_XNP_NP_EXCHANGE)) - ? xgbe_an_tx_xnp(pdata, state) - : xgbe_an_tx_training(pdata, state); + ? xgbe_an73_tx_xnp(pdata, state) + : xgbe_an73_tx_training(pdata, state); } -static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_rx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int ad_reg, lp_reg; @@ -543,11 +566,11 @@ static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata, return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || (lp_reg & XGBE_XNP_NP_EXCHANGE)) - ? xgbe_an_tx_xnp(pdata, state) - : xgbe_an_tx_training(pdata, state); + ? xgbe_an73_tx_xnp(pdata, state) + : xgbe_an73_tx_training(pdata, state); } -static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) +static enum xgbe_an xgbe_an73_page_received(struct xgbe_prv_data *pdata) { enum xgbe_rx *state; unsigned long an_timeout; @@ -566,20 +589,20 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) pdata->an_start = jiffies; netif_dbg(pdata, link, pdata->netdev, - "AN timed out, resetting state\n"); + "CL73 AN timed out, resetting state\n"); } } state = xgbe_in_kr_mode(pdata) ? &pdata->kr_state - : &pdata->kx_state; + : &pdata->kx_state; switch (*state) { case XGBE_RX_BPA: - ret = xgbe_an_rx_bpa(pdata, state); + ret = xgbe_an73_rx_bpa(pdata, state); break; case XGBE_RX_XNP: - ret = xgbe_an_rx_xnp(pdata, state); + ret = xgbe_an73_rx_xnp(pdata, state); break; default: @@ -589,7 +612,7 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) return ret; } -static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata) +static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) { /* Be sure we aren't looping trying to negotiate */ if (xgbe_in_kr_mode(pdata)) { @@ -611,23 +634,43 @@ static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata) return XGBE_AN_NO_LINK; } - xgbe_disable_an(pdata); + xgbe_an73_disable(pdata); xgbe_switch_mode(pdata); - xgbe_restart_an(pdata); + xgbe_an73_restart(pdata); return XGBE_AN_INCOMPAT_LINK; } -static irqreturn_t xgbe_an_isr(int irq, void *data) +static void xgbe_an37_isr(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + unsigned int reg; - netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); + /* Disable AN interrupts */ + xgbe_an37_disable_interrupts(pdata); + + /* Save the interrupt(s) that fired */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT); + pdata->an_int = reg & XGBE_AN_CL37_INT_MASK; + pdata->an_status = reg & ~XGBE_AN_CL37_INT_MASK; + if (pdata->an_int) { + /* Clear the interrupt(s) that fired and process them */ + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg); + + queue_work(pdata->an_workqueue, &pdata->an_irq_work); + } else { + /* Enable AN interrupts */ + xgbe_an37_enable_interrupts(pdata); + } +} + +static void xgbe_an73_isr(struct xgbe_prv_data *pdata) +{ /* Disable AN interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + xgbe_an73_disable_interrupts(pdata); /* Save the interrupt(s) that fired */ pdata->an_int = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT); @@ -639,13 +682,37 @@ static irqreturn_t xgbe_an_isr(int irq, void *data) queue_work(pdata->an_workqueue, &pdata->an_irq_work); } else { /* Enable AN interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, - XGBE_AN_INT_MASK); + xgbe_an73_enable_interrupts(pdata); + } +} + +static irqreturn_t xgbe_an_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_isr(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_isr(pdata); + break; + default: + break; } return IRQ_HANDLED; } +static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata) +{ + return xgbe_an_isr(irq, pdata); +} + static void xgbe_an_irq_work(struct work_struct *work) { struct xgbe_prv_data *pdata = container_of(work, @@ -679,36 +746,87 @@ static const char *xgbe_state_as_string(enum xgbe_an state) } } -static void xgbe_an_state_machine(struct work_struct *work) +static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = container_of(work, - struct xgbe_prv_data, - an_work); enum xgbe_an cur_state = pdata->an_state; - mutex_lock(&pdata->an_mutex); + if (!pdata->an_int) + return; + + if (pdata->an_int & XGBE_AN_CL37_INT_CMPLT) { + pdata->an_state = XGBE_AN_COMPLETE; + pdata->an_int &= ~XGBE_AN_CL37_INT_CMPLT; + + /* If SGMII is enabled, check the link status */ + if ((pdata->an_mode == XGBE_AN_MODE_CL37_SGMII) && + !(pdata->an_status & XGBE_SGMII_AN_LINK_STATUS)) + pdata->an_state = XGBE_AN_NO_LINK; + } + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN %s\n", + xgbe_state_as_string(pdata->an_state)); + + cur_state = pdata->an_state; + + switch (pdata->an_state) { + case XGBE_AN_READY: + break; + + case XGBE_AN_COMPLETE: + netif_dbg(pdata, link, pdata->netdev, + "Auto negotiation successful\n"); + break; + + case XGBE_AN_NO_LINK: + break; + + default: + pdata->an_state = XGBE_AN_ERROR; + } + + if (pdata->an_state == XGBE_AN_ERROR) { + netdev_err(pdata->netdev, + "error during auto-negotiation, state=%u\n", + cur_state); + + pdata->an_int = 0; + xgbe_an37_clear_interrupts(pdata); + } + + if (pdata->an_state >= XGBE_AN_COMPLETE) { + pdata->an_result = pdata->an_state; + pdata->an_state = XGBE_AN_READY; + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n", + xgbe_state_as_string(pdata->an_result)); + } + + xgbe_an37_enable_interrupts(pdata); +} + +static void xgbe_an73_state_machine(struct xgbe_prv_data *pdata) +{ + enum xgbe_an cur_state = pdata->an_state; if (!pdata->an_int) - goto out; + return; next_int: - if (pdata->an_int & XGBE_AN_PG_RCV) { + if (pdata->an_int & XGBE_AN_CL73_PG_RCV) { pdata->an_state = XGBE_AN_PAGE_RECEIVED; - pdata->an_int &= ~XGBE_AN_PG_RCV; - } else if (pdata->an_int & XGBE_AN_INC_LINK) { + pdata->an_int &= ~XGBE_AN_CL73_PG_RCV; + } else if (pdata->an_int & XGBE_AN_CL73_INC_LINK) { pdata->an_state = XGBE_AN_INCOMPAT_LINK; - pdata->an_int &= ~XGBE_AN_INC_LINK; - } else if (pdata->an_int & XGBE_AN_INT_CMPLT) { + pdata->an_int &= ~XGBE_AN_CL73_INC_LINK; + } else if (pdata->an_int & XGBE_AN_CL73_INT_CMPLT) { pdata->an_state = XGBE_AN_COMPLETE; - pdata->an_int &= ~XGBE_AN_INT_CMPLT; + pdata->an_int &= ~XGBE_AN_CL73_INT_CMPLT; } else { pdata->an_state = XGBE_AN_ERROR; } - pdata->an_result = pdata->an_state; - again: - netif_dbg(pdata, link, pdata->netdev, "AN %s\n", + netif_dbg(pdata, link, pdata->netdev, "CL73 AN %s\n", xgbe_state_as_string(pdata->an_state)); cur_state = pdata->an_state; @@ -719,14 +837,14 @@ again: break; case XGBE_AN_PAGE_RECEIVED: - pdata->an_state = xgbe_an_page_received(pdata); + pdata->an_state = xgbe_an73_page_received(pdata); pdata->an_supported++; break; case XGBE_AN_INCOMPAT_LINK: pdata->an_supported = 0; pdata->parallel_detect = 0; - pdata->an_state = xgbe_an_incompat_link(pdata); + pdata->an_state = xgbe_an73_incompat_link(pdata); break; case XGBE_AN_COMPLETE: @@ -745,14 +863,14 @@ again: if (pdata->an_state == XGBE_AN_NO_LINK) { pdata->an_int = 0; - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an73_clear_interrupts(pdata); } else if (pdata->an_state == XGBE_AN_ERROR) { netdev_err(pdata->netdev, "error during auto-negotiation, state=%u\n", cur_state); pdata->an_int = 0; - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an73_clear_interrupts(pdata); } if (pdata->an_state >= XGBE_AN_COMPLETE) { @@ -762,7 +880,7 @@ again: pdata->kx_state = XGBE_RX_BPA; pdata->an_start = 0; - netif_dbg(pdata, link, pdata->netdev, "AN result: %s\n", + netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } @@ -772,20 +890,88 @@ again: if (pdata->an_int) goto next_int; -out: - /* Enable AN interrupts on the way out */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_INT_MASK); + xgbe_an73_enable_interrupts(pdata); +} + +static void xgbe_an_state_machine(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + an_work); + + mutex_lock(&pdata->an_mutex); + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_state_machine(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_state_machine(pdata); + break; + default: + break; + } mutex_unlock(&pdata->an_mutex); } -static void xgbe_an_init(struct xgbe_prv_data *pdata) +static void xgbe_an37_init(struct xgbe_prv_data *pdata) { - unsigned int reg; + unsigned int advertising, reg; + + advertising = pdata->phy_if.phy_impl.an_advertising(pdata); + + /* Set up Advertisement register */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE); + if (advertising & ADVERTISED_Pause) + reg |= 0x100; + else + reg &= ~0x100; + + if (advertising & ADVERTISED_Asym_Pause) + reg |= 0x80; + else + reg &= ~0x80; + + /* Full duplex, but not half */ + reg |= XGBE_AN_CL37_FD_MASK; + reg &= ~XGBE_AN_CL37_HD_MASK; + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE, reg); + + /* Set up the Control register */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg &= XGBE_AN_CL37_TX_CONFIG_MASK; + reg &= XGBE_AN_CL37_PCS_MODE_MASK; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL37: + reg |= XGBE_AN_CL37_PCS_MODE_BASEX; + break; + case XGBE_AN_MODE_CL37_SGMII: + reg |= XGBE_AN_CL37_PCS_MODE_SGMII; + break; + default: + break; + } + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", + (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); +} + +static void xgbe_an73_init(struct xgbe_prv_data *pdata) +{ + unsigned int advertising, reg; + + advertising = pdata->phy_if.phy_impl.an_advertising(pdata); /* Set up Advertisement register 3 first */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - if (pdata->phy.advertising & ADVERTISED_10000baseR_FEC) + if (advertising & ADVERTISED_10000baseR_FEC) reg |= 0xc000; else reg &= ~0xc000; @@ -794,13 +980,13 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) /* Set up Advertisement register 2 next */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) + if (advertising & ADVERTISED_10000baseKR_Full) reg |= 0x80; else reg &= ~0x80; - if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || - (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) + if ((advertising & ADVERTISED_1000baseKX_Full) || + (advertising & ADVERTISED_2500baseX_Full)) reg |= 0x20; else reg &= ~0x20; @@ -809,12 +995,12 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) /* Set up Advertisement register 1 last */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - if (pdata->phy.advertising & ADVERTISED_Pause) + if (advertising & ADVERTISED_Pause) reg |= 0x400; else reg &= ~0x400; - if (pdata->phy.advertising & ADVERTISED_Asym_Pause) + if (advertising & ADVERTISED_Asym_Pause) reg |= 0x800; else reg &= ~0x800; @@ -824,7 +1010,25 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg); - netif_dbg(pdata, link, pdata->netdev, "AN initialized\n"); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN initialized\n"); +} + +static void xgbe_an_init(struct xgbe_prv_data *pdata) +{ + /* Set up advertisement registers based on current settings */ + pdata->an_mode = pdata->phy_if.phy_impl.an_mode(pdata); + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_init(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_init(pdata); + break; + default: + break; + } } static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) @@ -842,6 +1046,8 @@ static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) static const char *xgbe_phy_speed_string(int speed) { switch (speed) { + case SPEED_100: + return "100Mbps"; case SPEED_1000: return "1Gbps"; case SPEED_2500: @@ -907,24 +1113,32 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) xgbe_phy_print_status(pdata); } +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + return pdata->phy_if.phy_impl.valid_speed(pdata, speed); +} + static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) { + enum xgbe_mode mode; + netif_dbg(pdata, link, pdata->netdev, "fixed PHY configuration\n"); /* Disable auto-negotiation */ - xgbe_disable_an(pdata); - - /* Validate/Set specified speed */ - switch (pdata->phy.speed) { - case SPEED_10000: - xgbe_set_mode(pdata, XGBE_MODE_KR); + xgbe_an_disable(pdata); + + /* Set specified mode for specified speed */ + mode = pdata->phy_if.phy_impl.get_mode(pdata, pdata->phy.speed); + switch (mode) { + case XGBE_MODE_KX_1000: + case XGBE_MODE_KX_2500: + case XGBE_MODE_KR: + case XGBE_MODE_SGMII_100: + case XGBE_MODE_SGMII_1000: + case XGBE_MODE_X: + case XGBE_MODE_SFI: break; - - case SPEED_2500: - case SPEED_1000: - xgbe_set_mode(pdata, XGBE_MODE_KX); - break; - + case XGBE_MODE_UNKNOWN: default: return -EINVAL; } @@ -933,38 +1147,60 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) if (pdata->phy.duplex != DUPLEX_FULL) return -EINVAL; + xgbe_set_mode(pdata, mode); + return 0; } static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) { + int ret; + set_bit(XGBE_LINK_INIT, &pdata->dev_state); pdata->link_check = jiffies; - if (pdata->phy.autoneg != AUTONEG_ENABLE) - return xgbe_phy_config_fixed(pdata); + ret = pdata->phy_if.phy_impl.an_config(pdata); + if (ret) + return ret; + + if (pdata->phy.autoneg != AUTONEG_ENABLE) { + ret = xgbe_phy_config_fixed(pdata); + if (ret || !pdata->kr_redrv) + return ret; - netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n"); + netif_dbg(pdata, link, pdata->netdev, "AN redriver support\n"); + } else { + netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n"); + } /* Disable auto-negotiation interrupt */ disable_irq(pdata->an_irq); /* Start auto-negotiation in a supported mode */ - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) { + if (xgbe_use_mode(pdata, XGBE_MODE_KR)) { xgbe_set_mode(pdata, XGBE_MODE_KR); - } else if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || - (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) { - xgbe_set_mode(pdata, XGBE_MODE_KX); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) { + xgbe_set_mode(pdata, XGBE_MODE_KX_2500); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) { + xgbe_set_mode(pdata, XGBE_MODE_KX_1000); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) { + xgbe_set_mode(pdata, XGBE_MODE_SFI); + } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) { + xgbe_set_mode(pdata, XGBE_MODE_X); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) { + xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { + xgbe_set_mode(pdata, XGBE_MODE_SGMII_100); } else { enable_irq(pdata->an_irq); return -EINVAL; } /* Disable and stop any in progress auto-negotiation */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); /* Clear any auto-negotitation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an_clear_interrupts_all(pdata); pdata->an_result = XGBE_AN_READY; pdata->an_state = XGBE_AN_READY; @@ -974,11 +1210,8 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) /* Re-enable auto-negotiation interrupt */ enable_irq(pdata->an_irq); - /* Set up advertisement registers based on current settings */ xgbe_an_init(pdata); - - /* Enable and start auto-negotiation */ - xgbe_restart_an(pdata); + xgbe_an_restart(pdata); return 0; } @@ -1016,108 +1249,52 @@ static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) } } -static void xgbe_phy_status_force(struct xgbe_prv_data *pdata) +static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) { - if (xgbe_in_kr_mode(pdata)) { - pdata->phy.speed = SPEED_10000; - } else { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.speed = SPEED_1000; - break; - - case XGBE_SPEEDSET_2500_10000: - pdata->phy.speed = SPEED_2500; - break; - } - } - pdata->phy.duplex = DUPLEX_FULL; + return pdata->phy_if.phy_impl.an_outcome(pdata); } -static void xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) +static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) { - unsigned int ad_reg, lp_reg; + enum xgbe_mode mode; pdata->phy.lp_advertising = 0; if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect) - return xgbe_phy_status_force(pdata); - - pdata->phy.lp_advertising |= ADVERTISED_Autoneg; - pdata->phy.lp_advertising |= ADVERTISED_Backplane; - - /* Compare Advertisement and Link Partner register 1 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); - if (lp_reg & 0x400) - pdata->phy.lp_advertising |= ADVERTISED_Pause; - if (lp_reg & 0x800) - pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; - - if (pdata->phy.pause_autoneg) { - /* Set flow control based on auto-negotiation result */ - pdata->phy.tx_pause = 0; - pdata->phy.rx_pause = 0; - - if (ad_reg & lp_reg & 0x400) { - pdata->phy.tx_pause = 1; - pdata->phy.rx_pause = 1; - } else if (ad_reg & lp_reg & 0x800) { - if (ad_reg & 0x400) - pdata->phy.rx_pause = 1; - else if (lp_reg & 0x400) - pdata->phy.tx_pause = 1; - } - } - - /* Compare Advertisement and Link Partner register 2 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); - if (lp_reg & 0x80) - pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; - if (lp_reg & 0x20) { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; - break; - case XGBE_SPEEDSET_2500_10000: - pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full; - break; - } - } + mode = xgbe_cur_mode(pdata); + else + mode = xgbe_phy_status_aneg(pdata); - ad_reg &= lp_reg; - if (ad_reg & 0x80) { + switch (mode) { + case XGBE_MODE_SGMII_100: + pdata->phy.speed = SPEED_100; + break; + case XGBE_MODE_X: + case XGBE_MODE_KX_1000: + case XGBE_MODE_SGMII_1000: + pdata->phy.speed = SPEED_1000; + break; + case XGBE_MODE_KX_2500: + pdata->phy.speed = SPEED_2500; + break; + case XGBE_MODE_KR: + case XGBE_MODE_SFI: pdata->phy.speed = SPEED_10000; - xgbe_set_mode(pdata, XGBE_MODE_KR); - } else if (ad_reg & 0x20) { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.speed = SPEED_1000; - break; - - case XGBE_SPEEDSET_2500_10000: - pdata->phy.speed = SPEED_2500; - break; - } - - xgbe_set_mode(pdata, XGBE_MODE_KX); - } else { + break; + case XGBE_MODE_UNKNOWN: + default: pdata->phy.speed = SPEED_UNKNOWN; } - /* Compare Advertisement and Link Partner register 3 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); - if (lp_reg & 0xc000) - pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; - pdata->phy.duplex = DUPLEX_FULL; + + xgbe_set_mode(pdata, mode); } static void xgbe_phy_status(struct xgbe_prv_data *pdata) { - unsigned int reg, link_aneg; + unsigned int link_aneg; + int an_restart; if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) { netif_carrier_off(pdata->netdev); @@ -1128,12 +1305,12 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE); - /* Get the link status. Link status is latched low, so read - * once to clear and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - pdata->phy.link = (reg & MDIO_STAT1_LSTATUS) ? 1 : 0; + pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, + &an_restart); + if (an_restart) { + xgbe_phy_config_aneg(pdata); + return; + } if (pdata->phy.link) { if (link_aneg && !xgbe_phy_aneg_done(pdata)) { @@ -1141,7 +1318,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_aneg(pdata); + xgbe_phy_status_result(pdata); if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) clear_bit(XGBE_LINK_INIT, &pdata->dev_state); @@ -1155,7 +1332,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_aneg(pdata); + xgbe_phy_status_result(pdata); netif_carrier_off(pdata->netdev); } @@ -1168,13 +1345,19 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) { netif_dbg(pdata, link, pdata->netdev, "stopping PHY\n"); + if (!pdata->phy_started) + return; + + /* Indicate the PHY is down */ + pdata->phy_started = 0; + /* Disable auto-negotiation */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); - /* Disable auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + if (pdata->dev_irq != pdata->an_irq) + devm_free_irq(pdata->dev, pdata->an_irq, pdata); - devm_free_irq(pdata->dev, pdata->an_irq, pdata); + pdata->phy_if.phy_impl.stop(pdata); pdata->phy.link = 0; netif_carrier_off(pdata->netdev); @@ -1189,64 +1372,74 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "starting PHY\n"); - ret = devm_request_irq(pdata->dev, pdata->an_irq, - xgbe_an_isr, 0, pdata->an_name, - pdata); - if (ret) { - netdev_err(netdev, "phy irq request failed\n"); + ret = pdata->phy_if.phy_impl.start(pdata); + if (ret) return ret; + + /* If we have a separate AN irq, enable it */ + if (pdata->dev_irq != pdata->an_irq) { + ret = devm_request_irq(pdata->dev, pdata->an_irq, + xgbe_an_isr, 0, pdata->an_name, + pdata); + if (ret) { + netdev_err(netdev, "phy irq request failed\n"); + goto err_stop; + } } /* Set initial mode - call the mode setting routines * directly to insure we are properly configured */ - if (xgbe_use_xgmii_mode(pdata)) { - xgbe_xgmii_mode(pdata); - } else if (xgbe_use_gmii_mode(pdata)) { - xgbe_gmii_mode(pdata); - } else if (xgbe_use_gmii_2500_mode(pdata)) { - xgbe_gmii_2500_mode(pdata); + if (xgbe_use_mode(pdata, XGBE_MODE_KR)) { + xgbe_kr_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) { + xgbe_kx_2500_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) { + xgbe_kx_1000_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) { + xgbe_sfi_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) { + xgbe_x_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) { + xgbe_sgmii_1000_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { + xgbe_sgmii_100_mode(pdata); } else { ret = -EINVAL; goto err_irq; } - /* Set up advertisement registers based on current settings */ - xgbe_an_init(pdata); + /* Indicate the PHY is up and running */ + pdata->phy_started = 1; - /* Enable auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07); + xgbe_an_init(pdata); + xgbe_an_enable_interrupts(pdata); return xgbe_phy_config_aneg(pdata); err_irq: - devm_free_irq(pdata->dev, pdata->an_irq, pdata); + if (pdata->dev_irq != pdata->an_irq) + devm_free_irq(pdata->dev, pdata->an_irq, pdata); + +err_stop: + pdata->phy_if.phy_impl.stop(pdata); return ret; } static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { - unsigned int count, reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg |= MDIO_CTRL1_RESET; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); - - count = 50; - do { - msleep(20); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - } while ((reg & MDIO_CTRL1_RESET) && --count); + int ret; - if (reg & MDIO_CTRL1_RESET) - return -ETIMEDOUT; + ret = pdata->phy_if.phy_impl.reset(pdata); + if (ret) + return ret; /* Disable auto-negotiation for now */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); /* Clear auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an_clear_interrupts_all(pdata); return 0; } @@ -1257,74 +1450,96 @@ static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) dev_dbg(dev, "\n************* PHY Reg dump **********************\n"); - dev_dbg(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + dev_dbg(dev, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); - dev_dbg(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + dev_dbg(dev, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); - dev_dbg(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1, + dev_dbg(dev, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); - dev_dbg(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2, + dev_dbg(dev, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); - dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1, + dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); - dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2, + dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); - dev_dbg(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + dev_dbg(dev, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); - dev_dbg(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + dev_dbg(dev, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); - dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); - dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); - dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 2, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); - dev_dbg(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Completion Reg (%#06x) = %#06x\n", MDIO_AN_COMP_STAT, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); dev_dbg(dev, "\n*************************************************\n"); } -static void xgbe_phy_init(struct xgbe_prv_data *pdata) +static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata) { + if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) + return SPEED_10000; + else if (pdata->phy.advertising & ADVERTISED_10000baseT_Full) + return SPEED_10000; + else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full) + return SPEED_2500; + else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full) + return SPEED_1000; + else if (pdata->phy.advertising & ADVERTISED_1000baseT_Full) + return SPEED_1000; + else if (pdata->phy.advertising & ADVERTISED_100baseT_Full) + return SPEED_100; + + return SPEED_UNKNOWN; +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + xgbe_phy_stop(pdata); + + pdata->phy_if.phy_impl.exit(pdata); +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + int ret; + mutex_init(&pdata->an_mutex); INIT_WORK(&pdata->an_irq_work, xgbe_an_irq_work); INIT_WORK(&pdata->an_work, xgbe_an_state_machine); pdata->mdio_mmd = MDIO_MMD_PCS; - /* Initialize supported features */ - pdata->phy.supported = SUPPORTED_Autoneg; - pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - pdata->phy.supported |= SUPPORTED_Backplane; - pdata->phy.supported |= SUPPORTED_10000baseKR_Full; - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.supported |= SUPPORTED_1000baseKX_Full; - break; - case XGBE_SPEEDSET_2500_10000: - pdata->phy.supported |= SUPPORTED_2500baseX_Full; - break; - } - + /* Check for FEC support */ pdata->fec_ability = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECABLE); pdata->fec_ability &= (MDIO_PMA_10GBR_FECABLE_ABLE | MDIO_PMA_10GBR_FECABLE_ERRABLE); - if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) - pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + /* Setup the phy (including supported features) */ + ret = pdata->phy_if.phy_impl.init(pdata); + if (ret) + return ret; pdata->phy.advertising = pdata->phy.supported; pdata->phy.address = 0; - pdata->phy.autoneg = AUTONEG_ENABLE; - pdata->phy.speed = SPEED_UNKNOWN; - pdata->phy.duplex = DUPLEX_UNKNOWN; + if (pdata->phy.advertising & ADVERTISED_Autoneg) { + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + } else { + pdata->phy.autoneg = AUTONEG_DISABLE; + pdata->phy.speed = xgbe_phy_best_advertised_speed(pdata); + pdata->phy.duplex = DUPLEX_FULL; + } pdata->phy.link = 0; @@ -1346,11 +1561,14 @@ static void xgbe_phy_init(struct xgbe_prv_data *pdata) if (netif_msg_drv(pdata)) xgbe_dump_phy_registers(pdata); + + return 0; } void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if) { phy_if->phy_init = xgbe_phy_init; + phy_if->phy_exit = xgbe_phy_exit; phy_if->phy_reset = xgbe_phy_reset; phy_if->phy_start = xgbe_phy_start; @@ -1358,4 +1576,8 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if) phy_if->phy_status = xgbe_phy_status; phy_if->phy_config_aneg = xgbe_phy_config_aneg; + + phy_if->phy_valid_speed = xgbe_phy_valid_speed; + + phy_if->an_isr = xgbe_an_combined_isr; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c new file mode 100644 index 000000000000..e76b7f65b805 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -0,0 +1,529 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/log2.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +static int xgbe_config_msi(struct xgbe_prv_data *pdata) +{ + unsigned int msi_count; + unsigned int i, j; + int ret; + + msi_count = XGBE_MSIX_BASE_COUNT; + msi_count += max(pdata->rx_ring_count, + pdata->tx_ring_count); + msi_count = roundup_pow_of_two(msi_count); + + ret = pci_enable_msi_exact(pdata->pcidev, msi_count); + if (ret < 0) { + dev_info(pdata->dev, "MSI request for %u interrupts failed\n", + msi_count); + + ret = pci_enable_msi(pdata->pcidev); + if (ret < 0) { + dev_info(pdata->dev, "MSI enablement failed\n"); + return ret; + } + + msi_count = 1; + } + + pdata->irq_count = msi_count; + + pdata->dev_irq = pdata->pcidev->irq; + + if (msi_count > 1) { + pdata->ecc_irq = pdata->pcidev->irq + 1; + pdata->i2c_irq = pdata->pcidev->irq + 2; + pdata->an_irq = pdata->pcidev->irq + 3; + + for (i = XGBE_MSIX_BASE_COUNT, j = 0; + (i < msi_count) && (j < XGBE_MAX_DMA_CHANNELS); + i++, j++) + pdata->channel_irq[j] = pdata->pcidev->irq + i; + pdata->channel_irq_count = j; + + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; + } else { + pdata->ecc_irq = pdata->pcidev->irq; + pdata->i2c_irq = pdata->pcidev->irq; + pdata->an_irq = pdata->pcidev->irq; + } + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "MSI interrupts enabled\n"); + + return 0; +} + +static int xgbe_config_msix(struct xgbe_prv_data *pdata) +{ + unsigned int msix_count; + unsigned int i, j; + int ret; + + msix_count = XGBE_MSIX_BASE_COUNT; + msix_count += max(pdata->rx_ring_count, + pdata->tx_ring_count); + + pdata->msix_entries = devm_kcalloc(pdata->dev, msix_count, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!pdata->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_count; i++) + pdata->msix_entries[i].entry = i; + + ret = pci_enable_msix_range(pdata->pcidev, pdata->msix_entries, + XGBE_MSIX_MIN_COUNT, msix_count); + if (ret < 0) { + dev_info(pdata->dev, "MSI-X enablement failed\n"); + devm_kfree(pdata->dev, pdata->msix_entries); + pdata->msix_entries = NULL; + return ret; + } + + pdata->irq_count = ret; + + pdata->dev_irq = pdata->msix_entries[0].vector; + pdata->ecc_irq = pdata->msix_entries[1].vector; + pdata->i2c_irq = pdata->msix_entries[2].vector; + pdata->an_irq = pdata->msix_entries[3].vector; + + for (i = XGBE_MSIX_BASE_COUNT, j = 0; i < ret; i++, j++) + pdata->channel_irq[j] = pdata->msix_entries[i].vector; + pdata->channel_irq_count = j; + + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "MSI-X interrupts enabled\n"); + + return 0; +} + +static int xgbe_config_irqs(struct xgbe_prv_data *pdata) +{ + int ret; + + ret = xgbe_config_msix(pdata); + if (!ret) + goto out; + + ret = xgbe_config_msi(pdata); + if (!ret) + goto out; + + pdata->irq_count = 1; + pdata->irq_shared = 1; + + pdata->dev_irq = pdata->pcidev->irq; + pdata->ecc_irq = pdata->pcidev->irq; + pdata->i2c_irq = pdata->pcidev->irq; + pdata->an_irq = pdata->pcidev->irq; + +out: + if (netif_msg_probe(pdata)) { + unsigned int i; + + dev_dbg(pdata->dev, " dev irq=%d\n", pdata->dev_irq); + dev_dbg(pdata->dev, " ecc irq=%d\n", pdata->ecc_irq); + dev_dbg(pdata->dev, " i2c irq=%d\n", pdata->i2c_irq); + dev_dbg(pdata->dev, " an irq=%d\n", pdata->an_irq); + for (i = 0; i < pdata->channel_irq_count; i++) + dev_dbg(pdata->dev, " dma%u irq=%d\n", + i, pdata->channel_irq[i]); + } + + return 0; +} + +static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct xgbe_prv_data *pdata; + struct device *dev = &pdev->dev; + void __iomem * const *iomap_table; + unsigned int ma_lo, ma_hi; + unsigned int reg; + int bar_mask; + int ret; + + pdata = xgbe_alloc_pdata(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto err_alloc; + } + + pdata->pcidev = pdev; + pci_set_drvdata(pdev, pdata); + + /* Get the version data */ + pdata->vdata = (struct xgbe_version_data *)id->driver_data; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "pcim_enable_device failed\n"); + goto err_pci_enable; + } + + /* Obtain the mmio areas for the device */ + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, XGBE_DRV_NAME); + if (ret) { + dev_err(dev, "pcim_iomap_regions failed\n"); + goto err_pci_enable; + } + + iomap_table = pcim_iomap_table(pdev); + if (!iomap_table) { + dev_err(dev, "pcim_iomap_table failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + + pdata->xgmac_regs = iomap_table[XGBE_XGMAC_BAR]; + if (!pdata->xgmac_regs) { + dev_err(dev, "xgmac ioremap failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + pdata->xprop_regs = pdata->xgmac_regs + XGBE_MAC_PROP_OFFSET; + pdata->xi2c_regs = pdata->xgmac_regs + XGBE_I2C_CTRL_OFFSET; + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); + dev_dbg(dev, "xprop_regs = %p\n", pdata->xprop_regs); + dev_dbg(dev, "xi2c_regs = %p\n", pdata->xi2c_regs); + } + + pdata->xpcs_regs = iomap_table[XGBE_XPCS_BAR]; + if (!pdata->xpcs_regs) { + dev_err(dev, "xpcs ioremap failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + + /* Configure the PCS indirect addressing support */ + reg = XPCS32_IOREAD(pdata, PCS_V2_WINDOW_DEF); + pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); + pdata->xpcs_window <<= 6; + pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); + pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7); + pdata->xpcs_window_mask = pdata->xpcs_window_size - 1; + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "xpcs window = %#010x\n", + pdata->xpcs_window); + dev_dbg(dev, "xpcs window size = %#010x\n", + pdata->xpcs_window_size); + dev_dbg(dev, "xpcs window mask = %#010x\n", + pdata->xpcs_window_mask); + } + + pci_set_master(pdev); + + /* Enable all interrupts in the hardware */ + XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + + /* Retrieve the MAC address */ + ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO); + ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI); + pdata->mac_addr[0] = ma_lo & 0xff; + pdata->mac_addr[1] = (ma_lo >> 8) & 0xff; + pdata->mac_addr[2] = (ma_lo >> 16) & 0xff; + pdata->mac_addr[3] = (ma_lo >> 24) & 0xff; + pdata->mac_addr[4] = ma_hi & 0xff; + pdata->mac_addr[5] = (ma_hi >> 8) & 0xff; + if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID) || + !is_valid_ether_addr(pdata->mac_addr)) { + dev_err(dev, "invalid mac address\n"); + ret = -EINVAL; + goto err_pci_enable; + } + + /* Clock settings */ + pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ; + pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ; + + /* Set the DMA coherency values */ + pdata->coherent = 1; + pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; + pdata->arcache = XGBE_DMA_OS_ARCACHE; + pdata->awcache = XGBE_DMA_OS_AWCACHE; + + /* Set the maximum channels and queues */ + reg = XP_IOREAD(pdata, XP_PROP_1); + pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA); + pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA); + pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES); + pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES); + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "max tx/rx channel count = %u/%u\n", + pdata->tx_max_channel_count, + pdata->tx_max_channel_count); + dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n", + pdata->tx_max_q_count, pdata->rx_max_q_count); + } + + /* Set the hardware channel and queue counts */ + xgbe_set_counts(pdata); + + /* Set the maximum fifo amounts */ + reg = XP_IOREAD(pdata, XP_PROP_2); + pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE); + pdata->tx_max_fifo_size *= 16384; + pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size, + pdata->vdata->tx_max_fifo_size); + pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE); + pdata->rx_max_fifo_size *= 16384; + pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size, + pdata->vdata->rx_max_fifo_size); + if (netif_msg_probe(pdata)) + dev_dbg(dev, "max tx/rx max fifo size = %u/%u\n", + pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); + + /* Configure interrupt support */ + ret = xgbe_config_irqs(pdata); + if (ret) + goto err_pci_enable; + + /* Configure the netdev resource */ + ret = xgbe_config_netdev(pdata); + if (ret) + goto err_pci_enable; + + netdev_notice(pdata->netdev, "net device enabled\n"); + + return 0; + +err_pci_enable: + xgbe_free_pdata(pdata); + +err_alloc: + dev_notice(dev, "net device not enabled\n"); + + return ret; +} + +static void xgbe_pci_remove(struct pci_dev *pdev) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + + xgbe_deconfig_netdev(pdata); + + xgbe_free_pdata(pdata); +} + +#ifdef CONFIG_PM +static int xgbe_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + if (netif_running(netdev)) + ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + + pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + return ret; +} + +static int xgbe_pci_resume(struct pci_dev *pdev) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + if (netif_running(netdev)) { + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + + return ret; +} +#endif /* CONFIG_PM */ + +static const struct xgbe_version_data xgbe_v2a = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, + .xpcs_access = XGBE_XPCS_ACCESS_V2, + .mmc_64bit = 1, + .tx_max_fifo_size = 229376, + .rx_max_fifo_size = 229376, + .tx_tstamp_workaround = 1, + .ecc_support = 1, + .i2c_support = 1, +}; + +static const struct xgbe_version_data xgbe_v2b = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, + .xpcs_access = XGBE_XPCS_ACCESS_V2, + .mmc_64bit = 1, + .tx_max_fifo_size = 65536, + .rx_max_fifo_size = 65536, + .tx_tstamp_workaround = 1, + .ecc_support = 1, + .i2c_support = 1, +}; + +static const struct pci_device_id xgbe_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1458), + .driver_data = (kernel_ulong_t)&xgbe_v2a }, + { PCI_VDEVICE(AMD, 0x1459), + .driver_data = (kernel_ulong_t)&xgbe_v2b }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, xgbe_pci_table); + +static struct pci_driver xgbe_driver = { + .name = XGBE_DRV_NAME, + .id_table = xgbe_pci_table, + .probe = xgbe_pci_probe, + .remove = xgbe_pci_remove, +#ifdef CONFIG_PM + .suspend = xgbe_pci_suspend, + .resume = xgbe_pci_resume, +#endif +}; + +int xgbe_pci_init(void) +{ + return pci_register_driver(&xgbe_driver); +} + +void xgbe_pci_exit(void) +{ + pci_unregister_driver(&xgbe_driver); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c new file mode 100644 index 000000000000..c75edcac5e0a --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c @@ -0,0 +1,845 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/device.h> +#include <linux/property.h> +#include <linux/mdio.h> +#include <linux/phy.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_BLWC_PROPERTY "amd,serdes-blwc" +#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" +#define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" +#define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" +#define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" +#define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" + +/* Default SerDes settings */ +#define XGBE_SPEED_1000_BLWC 1 +#define XGBE_SPEED_1000_CDR 0x2 +#define XGBE_SPEED_1000_PLL 0x0 +#define XGBE_SPEED_1000_PQ 0xa +#define XGBE_SPEED_1000_RATE 0x3 +#define XGBE_SPEED_1000_TXAMP 0xf +#define XGBE_SPEED_1000_WORD 0x1 +#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 + +#define XGBE_SPEED_2500_BLWC 1 +#define XGBE_SPEED_2500_CDR 0x2 +#define XGBE_SPEED_2500_PLL 0x0 +#define XGBE_SPEED_2500_PQ 0xa +#define XGBE_SPEED_2500_RATE 0x1 +#define XGBE_SPEED_2500_TXAMP 0xf +#define XGBE_SPEED_2500_WORD 0x1 +#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 + +#define XGBE_SPEED_10000_BLWC 0 +#define XGBE_SPEED_10000_CDR 0x7 +#define XGBE_SPEED_10000_PLL 0x1 +#define XGBE_SPEED_10000_PQ 0x12 +#define XGBE_SPEED_10000_RATE 0x0 +#define XGBE_SPEED_10000_TXAMP 0xa +#define XGBE_SPEED_10000_WORD 0x7 +#define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 +#define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f + +/* Rate-change complete wait/retry count */ +#define XGBE_RATECHANGE_COUNT 500 + +static const u32 xgbe_phy_blwc[] = { + XGBE_SPEED_1000_BLWC, + XGBE_SPEED_2500_BLWC, + XGBE_SPEED_10000_BLWC, +}; + +static const u32 xgbe_phy_cdr_rate[] = { + XGBE_SPEED_1000_CDR, + XGBE_SPEED_2500_CDR, + XGBE_SPEED_10000_CDR, +}; + +static const u32 xgbe_phy_pq_skew[] = { + XGBE_SPEED_1000_PQ, + XGBE_SPEED_2500_PQ, + XGBE_SPEED_10000_PQ, +}; + +static const u32 xgbe_phy_tx_amp[] = { + XGBE_SPEED_1000_TXAMP, + XGBE_SPEED_2500_TXAMP, + XGBE_SPEED_10000_TXAMP, +}; + +static const u32 xgbe_phy_dfe_tap_cfg[] = { + XGBE_SPEED_1000_DFE_TAP_CONFIG, + XGBE_SPEED_2500_DFE_TAP_CONFIG, + XGBE_SPEED_10000_DFE_TAP_CONFIG, +}; + +static const u32 xgbe_phy_dfe_tap_ena[] = { + XGBE_SPEED_1000_DFE_TAP_ENABLE, + XGBE_SPEED_2500_DFE_TAP_ENABLE, + XGBE_SPEED_10000_DFE_TAP_ENABLE, +}; + +struct xgbe_phy_data { + /* 1000/10000 vs 2500/10000 indicator */ + unsigned int speed_set; + + /* SerDes UEFI configurable settings. + * Switching between modes/speeds requires new values for some + * SerDes settings. The values can be supplied as device + * properties in array format. The first array entry is for + * 1GbE, second for 2.5GbE and third for 10GbE + */ + u32 blwc[XGBE_SPEEDS]; + u32 cdr_rate[XGBE_SPEEDS]; + u32 pq_skew[XGBE_SPEEDS]; + u32 tx_amp[XGBE_SPEEDS]; + u32 dfe_tap_cfg[XGBE_SPEEDS]; + u32 dfe_tap_ena[XGBE_SPEEDS]; +}; + +static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) +{ + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1); +} + +static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) +{ + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0); +} + +static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Compare Advertisement and Link Partner register 1 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + if (lp_reg & 0x400) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x800) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x400) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x800) { + if (ad_reg & 0x400) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x400) + pdata->phy.tx_pause = 1; + } + } + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full; + else + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + } + + ad_reg &= lp_reg; + if (ad_reg & 0x80) { + mode = XGBE_MODE_KR; + } else if (ad_reg & 0x20) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } else { + mode = XGBE_MODE_UNKNOWN; + } + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata) +{ + return pdata->phy.advertising; +} + +static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for an configuration */ + return 0; +} + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata) +{ + return XGBE_AN_MODE_CL73; +} + +static void xgbe_phy_pcs_power_cycle(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + + reg |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + usleep_range(75, 100); + + reg &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); +} + +static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata) +{ + /* Assert Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1); +} + +static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) +{ + unsigned int wait; + u16 status; + + /* Release Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0); + + /* Wait for Rx and Tx ready */ + wait = XGBE_RATECHANGE_COUNT; + while (wait--) { + usleep_range(50, 75); + + status = XSIR0_IOREAD(pdata, SIR0_STATUS); + if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && + XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) + goto rx_reset; + } + + netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n", + status); + +rx_reset: + /* Perform Rx reset for the DFE changes */ + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1); +} + +static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KR/10G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBR; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED10G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 10G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_10000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_10000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_10000]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); +} + +static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 2.5G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_2500]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_2500]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_2500]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); +} + +static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 1G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_1000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_1000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_1000]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); +} + +static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= MDIO_PCS_CTRL2_TYPE; + + if (reg == MDIO_PCS_CTRL2_10GBR) { + mode = XGBE_MODE_KR; + } else { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + + /* If we are in KR switch to KX, and vice-versa */ + if (xgbe_phy_cur_mode(pdata) == XGBE_MODE_KR) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } else { + mode = XGBE_MODE_KR; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata, + int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (speed) { + case SPEED_1000: + return (phy_data->speed_set == XGBE_SPEEDSET_1000_10000) + ? XGBE_MODE_KX_1000 : XGBE_MODE_UNKNOWN; + case SPEED_2500: + return (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + ? XGBE_MODE_KX_2500 : XGBE_MODE_UNKNOWN; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_phy_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_phy_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_phy_kr_mode(pdata); + break; + default: + break; + } +} + +static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode, u32 advert) +{ + if (pdata->phy.autoneg == AUTONEG_ENABLE) { + if (pdata->phy.advertising & advert) + return true; + } else { + enum xgbe_mode cur_mode; + + cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed); + if (cur_mode == mode) + return true; + } + + return false; +} + +static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseKX_Full); + case XGBE_MODE_KX_2500: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_2500baseX_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseKR_Full); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (speed) { + case SPEED_1000: + if (phy_data->speed_set != XGBE_SPEEDSET_1000_10000) + return false; + return true; + case SPEED_2500: + if (phy_data->speed_set != XGBE_SPEEDSET_2500_10000) + return false; + return true; + case SPEED_10000: + return true; + default: + return false; + } +} + +static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) +{ + unsigned int reg; + + *an_restart = 0; + + /* Link status is latched low, so read once to clear + * and then read again to get current state + */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + + return (reg & MDIO_STAT1_LSTATUS) ? 1 : 0; +} + +static void xgbe_phy_stop(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for stop */ +} + +static int xgbe_phy_start(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for start */ + return 0; +} + +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) +{ + unsigned int reg, count; + + /* Perform a software reset of the PCS */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg |= MDIO_CTRL1_RESET; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + count = 50; + do { + msleep(20); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + } while ((reg & MDIO_CTRL1_RESET) && --count); + + if (reg & MDIO_CTRL1_RESET) + return -ETIMEDOUT; + + return 0; +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for exit */ +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data; + int ret; + + phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + + /* Retrieve the PHY speedset */ + ret = device_property_read_u32(pdata->phy_dev, XGBE_SPEEDSET_PROPERTY, + &phy_data->speed_set); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_SPEEDSET_PROPERTY); + return ret; + } + + switch (phy_data->speed_set) { + case XGBE_SPEEDSET_1000_10000: + case XGBE_SPEEDSET_2500_10000: + break; + default: + dev_err(pdata->dev, "invalid %s property\n", + XGBE_SPEEDSET_PROPERTY); + return -EINVAL; + } + + /* Retrieve the PHY configuration properties */ + if (device_property_present(pdata->phy_dev, XGBE_BLWC_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_BLWC_PROPERTY, + phy_data->blwc, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_BLWC_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->blwc, xgbe_phy_blwc, + sizeof(phy_data->blwc)); + } + + if (device_property_present(pdata->phy_dev, XGBE_CDR_RATE_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_CDR_RATE_PROPERTY, + phy_data->cdr_rate, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_CDR_RATE_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->cdr_rate, xgbe_phy_cdr_rate, + sizeof(phy_data->cdr_rate)); + } + + if (device_property_present(pdata->phy_dev, XGBE_PQ_SKEW_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_PQ_SKEW_PROPERTY, + phy_data->pq_skew, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_PQ_SKEW_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->pq_skew, xgbe_phy_pq_skew, + sizeof(phy_data->pq_skew)); + } + + if (device_property_present(pdata->phy_dev, XGBE_TX_AMP_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_TX_AMP_PROPERTY, + phy_data->tx_amp, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_TX_AMP_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->tx_amp, xgbe_phy_tx_amp, + sizeof(phy_data->tx_amp)); + } + + if (device_property_present(pdata->phy_dev, XGBE_DFE_CFG_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_DFE_CFG_PROPERTY, + phy_data->dfe_tap_cfg, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_DFE_CFG_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->dfe_tap_cfg, xgbe_phy_dfe_tap_cfg, + sizeof(phy_data->dfe_tap_cfg)); + } + + if (device_property_present(pdata->phy_dev, XGBE_DFE_ENA_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_DFE_ENA_PROPERTY, + phy_data->dfe_tap_ena, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_DFE_ENA_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->dfe_tap_ena, xgbe_phy_dfe_tap_ena, + sizeof(phy_data->dfe_tap_ena)); + } + + /* Initialize supported features */ + pdata->phy.supported = SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + pdata->phy.supported |= SUPPORTED_10000baseKR_Full; + switch (phy_data->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.supported |= SUPPORTED_1000baseKX_Full; + break; + case XGBE_SPEEDSET_2500_10000: + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + break; + } + + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + + pdata->phy_data = phy_data; + + return 0; +} + +void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *phy_if) +{ + struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl; + + phy_impl->init = xgbe_phy_init; + phy_impl->exit = xgbe_phy_exit; + + phy_impl->reset = xgbe_phy_reset; + phy_impl->start = xgbe_phy_start; + phy_impl->stop = xgbe_phy_stop; + + phy_impl->link_status = xgbe_phy_link_status; + + phy_impl->valid_speed = xgbe_phy_valid_speed; + + phy_impl->use_mode = xgbe_phy_use_mode; + phy_impl->set_mode = xgbe_phy_set_mode; + phy_impl->get_mode = xgbe_phy_get_mode; + phy_impl->switch_mode = xgbe_phy_switch_mode; + phy_impl->cur_mode = xgbe_phy_cur_mode; + + phy_impl->an_mode = xgbe_phy_an_mode; + + phy_impl->an_config = xgbe_phy_an_config; + + phy_impl->an_advertising = xgbe_phy_an_advertising; + + phy_impl->an_outcome = xgbe_phy_an_outcome; + + phy_impl->kr_training_pre = xgbe_phy_kr_training_pre; + phy_impl->kr_training_post = xgbe_phy_kr_training_post; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c new file mode 100644 index 000000000000..4ba43328d99e --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -0,0 +1,3083 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kmod.h> +#include <linux/mdio.h> +#include <linux/phy.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_PHY_PORT_SPEED_100 BIT(0) +#define XGBE_PHY_PORT_SPEED_1000 BIT(1) +#define XGBE_PHY_PORT_SPEED_2500 BIT(2) +#define XGBE_PHY_PORT_SPEED_10000 BIT(3) + +#define XGBE_MUTEX_RELEASE 0x80000000 + +#define XGBE_SFP_DIRECT 7 + +/* I2C target addresses */ +#define XGBE_SFP_SERIAL_ID_ADDRESS 0x50 +#define XGBE_SFP_DIAG_INFO_ADDRESS 0x51 +#define XGBE_SFP_PHY_ADDRESS 0x56 +#define XGBE_GPIO_ADDRESS_PCA9555 0x20 + +/* SFP sideband signal indicators */ +#define XGBE_GPIO_NO_TX_FAULT BIT(0) +#define XGBE_GPIO_NO_RATE_SELECT BIT(1) +#define XGBE_GPIO_NO_MOD_ABSENT BIT(2) +#define XGBE_GPIO_NO_RX_LOS BIT(3) + +/* Rate-change complete wait/retry count */ +#define XGBE_RATECHANGE_COUNT 500 + +enum xgbe_port_mode { + XGBE_PORT_MODE_RSVD = 0, + XGBE_PORT_MODE_BACKPLANE, + XGBE_PORT_MODE_BACKPLANE_2500, + XGBE_PORT_MODE_1000BASE_T, + XGBE_PORT_MODE_1000BASE_X, + XGBE_PORT_MODE_NBASE_T, + XGBE_PORT_MODE_10GBASE_T, + XGBE_PORT_MODE_10GBASE_R, + XGBE_PORT_MODE_SFP, + XGBE_PORT_MODE_MAX, +}; + +enum xgbe_conn_type { + XGBE_CONN_TYPE_NONE = 0, + XGBE_CONN_TYPE_SFP, + XGBE_CONN_TYPE_MDIO, + XGBE_CONN_TYPE_BACKPLANE, + XGBE_CONN_TYPE_MAX, +}; + +/* SFP/SFP+ related definitions */ +enum xgbe_sfp_comm { + XGBE_SFP_COMM_DIRECT = 0, + XGBE_SFP_COMM_PCA9545, +}; + +enum xgbe_sfp_cable { + XGBE_SFP_CABLE_UNKNOWN = 0, + XGBE_SFP_CABLE_ACTIVE, + XGBE_SFP_CABLE_PASSIVE, +}; + +enum xgbe_sfp_base { + XGBE_SFP_BASE_UNKNOWN = 0, + XGBE_SFP_BASE_1000_T, + XGBE_SFP_BASE_1000_SX, + XGBE_SFP_BASE_1000_LX, + XGBE_SFP_BASE_1000_CX, + XGBE_SFP_BASE_10000_SR, + XGBE_SFP_BASE_10000_LR, + XGBE_SFP_BASE_10000_LRM, + XGBE_SFP_BASE_10000_ER, + XGBE_SFP_BASE_10000_CR, +}; + +enum xgbe_sfp_speed { + XGBE_SFP_SPEED_UNKNOWN = 0, + XGBE_SFP_SPEED_100_1000, + XGBE_SFP_SPEED_1000, + XGBE_SFP_SPEED_10000, +}; + +/* SFP Serial ID Base ID values relative to an offset of 0 */ +#define XGBE_SFP_BASE_ID 0 +#define XGBE_SFP_ID_SFP 0x03 + +#define XGBE_SFP_BASE_EXT_ID 1 +#define XGBE_SFP_EXT_ID_SFP 0x04 + +#define XGBE_SFP_BASE_10GBE_CC 3 +#define XGBE_SFP_BASE_10GBE_CC_SR BIT(4) +#define XGBE_SFP_BASE_10GBE_CC_LR BIT(5) +#define XGBE_SFP_BASE_10GBE_CC_LRM BIT(6) +#define XGBE_SFP_BASE_10GBE_CC_ER BIT(7) + +#define XGBE_SFP_BASE_1GBE_CC 6 +#define XGBE_SFP_BASE_1GBE_CC_SX BIT(0) +#define XGBE_SFP_BASE_1GBE_CC_LX BIT(1) +#define XGBE_SFP_BASE_1GBE_CC_CX BIT(2) +#define XGBE_SFP_BASE_1GBE_CC_T BIT(3) + +#define XGBE_SFP_BASE_CABLE 8 +#define XGBE_SFP_BASE_CABLE_PASSIVE BIT(2) +#define XGBE_SFP_BASE_CABLE_ACTIVE BIT(3) + +#define XGBE_SFP_BASE_BR 12 +#define XGBE_SFP_BASE_BR_1GBE_MIN 0x0a +#define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d +#define XGBE_SFP_BASE_BR_10GBE_MIN 0x64 +#define XGBE_SFP_BASE_BR_10GBE_MAX 0x68 + +#define XGBE_SFP_BASE_CU_CABLE_LEN 18 + +#define XGBE_SFP_BASE_VENDOR_NAME 20 +#define XGBE_SFP_BASE_VENDOR_NAME_LEN 16 +#define XGBE_SFP_BASE_VENDOR_PN 40 +#define XGBE_SFP_BASE_VENDOR_PN_LEN 16 +#define XGBE_SFP_BASE_VENDOR_REV 56 +#define XGBE_SFP_BASE_VENDOR_REV_LEN 4 + +#define XGBE_SFP_BASE_CC 63 + +/* SFP Serial ID Extended ID values relative to an offset of 64 */ +#define XGBE_SFP_BASE_VENDOR_SN 4 +#define XGBE_SFP_BASE_VENDOR_SN_LEN 16 + +#define XGBE_SFP_EXTD_DIAG 28 +#define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) + +#define XGBE_SFP_EXTD_SFF_8472 30 + +#define XGBE_SFP_EXTD_CC 31 + +struct xgbe_sfp_eeprom { + u8 base[64]; + u8 extd[32]; + u8 vendor[32]; +}; + +#define XGBE_BEL_FUSE_VENDOR "BEL-FUSE " +#define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 " + +struct xgbe_sfp_ascii { + union { + char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1]; + char partno[XGBE_SFP_BASE_VENDOR_PN_LEN + 1]; + char rev[XGBE_SFP_BASE_VENDOR_REV_LEN + 1]; + char serno[XGBE_SFP_BASE_VENDOR_SN_LEN + 1]; + } u; +}; + +/* MDIO PHY reset types */ +enum xgbe_mdio_reset { + XGBE_MDIO_RESET_NONE = 0, + XGBE_MDIO_RESET_I2C_GPIO, + XGBE_MDIO_RESET_INT_GPIO, + XGBE_MDIO_RESET_MAX, +}; + +/* Re-driver related definitions */ +enum xgbe_phy_redrv_if { + XGBE_PHY_REDRV_IF_MDIO = 0, + XGBE_PHY_REDRV_IF_I2C, + XGBE_PHY_REDRV_IF_MAX, +}; + +enum xgbe_phy_redrv_model { + XGBE_PHY_REDRV_MODEL_4223 = 0, + XGBE_PHY_REDRV_MODEL_4227, + XGBE_PHY_REDRV_MODEL_MAX, +}; + +enum xgbe_phy_redrv_mode { + XGBE_PHY_REDRV_MODE_CX = 5, + XGBE_PHY_REDRV_MODE_SR = 9, +}; + +#define XGBE_PHY_REDRV_MODE_REG 0x12b0 + +/* PHY related configuration information */ +struct xgbe_phy_data { + enum xgbe_port_mode port_mode; + + unsigned int port_id; + + unsigned int port_speeds; + + enum xgbe_conn_type conn_type; + + enum xgbe_mode cur_mode; + enum xgbe_mode start_mode; + + unsigned int rrc_count; + + unsigned int mdio_addr; + + unsigned int comm_owned; + + /* SFP Support */ + enum xgbe_sfp_comm sfp_comm; + unsigned int sfp_mux_address; + unsigned int sfp_mux_channel; + + unsigned int sfp_gpio_address; + unsigned int sfp_gpio_mask; + unsigned int sfp_gpio_rx_los; + unsigned int sfp_gpio_tx_fault; + unsigned int sfp_gpio_mod_absent; + unsigned int sfp_gpio_rate_select; + + unsigned int sfp_rx_los; + unsigned int sfp_tx_fault; + unsigned int sfp_mod_absent; + unsigned int sfp_diags; + unsigned int sfp_changed; + unsigned int sfp_phy_avail; + unsigned int sfp_cable_len; + enum xgbe_sfp_base sfp_base; + enum xgbe_sfp_cable sfp_cable; + enum xgbe_sfp_speed sfp_speed; + struct xgbe_sfp_eeprom sfp_eeprom; + + /* External PHY support */ + enum xgbe_mdio_mode phydev_mode; + struct mii_bus *mii; + struct phy_device *phydev; + enum xgbe_mdio_reset mdio_reset; + unsigned int mdio_reset_addr; + unsigned int mdio_reset_gpio; + + /* Re-driver support */ + unsigned int redrv; + unsigned int redrv_if; + unsigned int redrv_addr; + unsigned int redrv_lane; + unsigned int redrv_model; +}; + +/* I2C, MDIO and GPIO lines are muxed, so only one device at a time */ +static DEFINE_MUTEX(xgbe_phy_comm_lock); + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata); + +static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata, + struct xgbe_i2c_op *i2c_op) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* Be sure we own the bus */ + if (WARN_ON(!phy_data->comm_owned)) + return -EIO; + + return pdata->i2c_if.i2c_xfer(pdata, i2c_op); +} + +static int xgbe_phy_redrv_write(struct xgbe_prv_data *pdata, unsigned int reg, + unsigned int val) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + __be16 *redrv_val; + u8 redrv_data[5], csum; + unsigned int i, retry; + int ret; + + /* High byte of register contains read/write indicator */ + redrv_data[0] = ((reg >> 8) & 0xff) << 1; + redrv_data[1] = reg & 0xff; + redrv_val = (__be16 *)&redrv_data[2]; + *redrv_val = cpu_to_be16(val); + + /* Calculate 1 byte checksum */ + csum = 0; + for (i = 0; i < 4; i++) { + csum += redrv_data[i]; + if (redrv_data[i] > csum) + csum++; + } + redrv_data[4] = ~csum; + + retry = 1; +again1: + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->redrv_addr; + i2c_op.len = sizeof(redrv_data); + i2c_op.buf = redrv_data; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again1; + + return ret; + } + + retry = 1; +again2: + i2c_op.cmd = XGBE_I2C_CMD_READ; + i2c_op.target = phy_data->redrv_addr; + i2c_op.len = 1; + i2c_op.buf = redrv_data; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again2; + + return ret; + } + + if (redrv_data[0] != 0xff) { + netif_dbg(pdata, drv, pdata->netdev, + "Redriver write checksum error\n"); + ret = -EIO; + } + + return ret; +} + +static int xgbe_phy_i2c_write(struct xgbe_prv_data *pdata, unsigned int target, + void *val, unsigned int val_len) +{ + struct xgbe_i2c_op i2c_op; + int retry, ret; + + retry = 1; +again: + /* Write the specfied register */ + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = target; + i2c_op.len = val_len; + i2c_op.buf = val; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if ((ret == -EAGAIN) && retry--) + goto again; + + return ret; +} + +static int xgbe_phy_i2c_read(struct xgbe_prv_data *pdata, unsigned int target, + void *reg, unsigned int reg_len, + void *val, unsigned int val_len) +{ + struct xgbe_i2c_op i2c_op; + int retry, ret; + + retry = 1; +again1: + /* Set the specified register to read */ + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = target; + i2c_op.len = reg_len; + i2c_op.buf = reg; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again1; + + return ret; + } + + retry = 1; +again2: + /* Read the specfied register */ + i2c_op.cmd = XGBE_I2C_CMD_READ; + i2c_op.target = target; + i2c_op.len = val_len; + i2c_op.buf = val; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if ((ret == -EAGAIN) && retry--) + goto again2; + + return ret; +} + +static int xgbe_phy_sfp_put_mux(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + u8 mux_channel; + + if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) + return 0; + + /* Select no mux channels */ + mux_channel = 0; + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->sfp_mux_address; + i2c_op.len = sizeof(mux_channel); + i2c_op.buf = &mux_channel; + + return xgbe_phy_i2c_xfer(pdata, &i2c_op); +} + +static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + u8 mux_channel; + + if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) + return 0; + + /* Select desired mux channel */ + mux_channel = 1 << phy_data->sfp_mux_channel; + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->sfp_mux_address; + i2c_op.len = sizeof(mux_channel); + i2c_op.buf = &mux_channel; + + return xgbe_phy_i2c_xfer(pdata, &i2c_op); +} + +static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + phy_data->comm_owned = 0; + + mutex_unlock(&xgbe_phy_comm_lock); +} + +static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned long timeout; + unsigned int mutex_id; + + if (phy_data->comm_owned) + return 0; + + /* The I2C and MDIO/GPIO bus is multiplexed between multiple devices, + * the driver needs to take the software mutex and then the hardware + * mutexes before being able to use the busses. + */ + mutex_lock(&xgbe_phy_comm_lock); + + /* Clear the mutexes */ + XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE); + XP_IOWRITE(pdata, XP_MDIO_MUTEX, XGBE_MUTEX_RELEASE); + + /* Mutex formats are the same for I2C and MDIO/GPIO */ + mutex_id = 0; + XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ID, phy_data->port_id); + XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ACTIVE, 1); + + timeout = jiffies + (5 * HZ); + while (time_before(jiffies, timeout)) { + /* Must be all zeroes in order to obtain the mutex */ + if (XP_IOREAD(pdata, XP_I2C_MUTEX) || + XP_IOREAD(pdata, XP_MDIO_MUTEX)) { + usleep_range(100, 200); + continue; + } + + /* Obtain the mutex */ + XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id); + XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id); + + phy_data->comm_owned = 1; + return 0; + } + + mutex_unlock(&xgbe_phy_comm_lock); + + netdev_err(pdata->netdev, "unable to obtain hardware mutexes\n"); + + return -ETIMEDOUT; +} + +static int xgbe_phy_mdio_mii_write(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (reg & MII_ADDR_C45) { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) + return -ENOTSUPP; + } else { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) + return -ENOTSUPP; + } + + return pdata->hw_if.write_ext_mii_regs(pdata, addr, reg, val); +} + +static int xgbe_phy_i2c_mii_write(struct xgbe_prv_data *pdata, int reg, u16 val) +{ + __be16 *mii_val; + u8 mii_data[3]; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) + return ret; + + mii_data[0] = reg & 0xff; + mii_val = (__be16 *)&mii_data[1]; + *mii_val = cpu_to_be16(val); + + ret = xgbe_phy_i2c_write(pdata, XGBE_SFP_PHY_ADDRESS, + mii_data, sizeof(mii_data)); + + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static int xgbe_phy_mii_write(struct mii_bus *mii, int addr, int reg, u16 val) +{ + struct xgbe_prv_data *pdata = mii->priv; + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + ret = xgbe_phy_i2c_mii_write(pdata, reg, val); + else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) + ret = xgbe_phy_mdio_mii_write(pdata, addr, reg, val); + else + ret = -ENOTSUPP; + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static int xgbe_phy_mdio_mii_read(struct xgbe_prv_data *pdata, int addr, + int reg) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (reg & MII_ADDR_C45) { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) + return -ENOTSUPP; + } else { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) + return -ENOTSUPP; + } + + return pdata->hw_if.read_ext_mii_regs(pdata, addr, reg); +} + +static int xgbe_phy_i2c_mii_read(struct xgbe_prv_data *pdata, int reg) +{ + __be16 mii_val; + u8 mii_reg; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) + return ret; + + mii_reg = reg; + ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_PHY_ADDRESS, + &mii_reg, sizeof(mii_reg), + &mii_val, sizeof(mii_val)); + if (!ret) + ret = be16_to_cpu(mii_val); + + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static int xgbe_phy_mii_read(struct mii_bus *mii, int addr, int reg) +{ + struct xgbe_prv_data *pdata = mii->priv; + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + ret = xgbe_phy_i2c_mii_read(pdata, reg); + else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) + ret = xgbe_phy_mdio_mii_read(pdata, addr, reg); + else + ret = -ENOTSUPP; + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (phy_data->sfp_mod_absent) { + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.advertising = pdata->phy.supported; + } + + pdata->phy.advertising &= ~ADVERTISED_Autoneg; + pdata->phy.advertising &= ~ADVERTISED_TP; + pdata->phy.advertising &= ~ADVERTISED_FIBRE; + pdata->phy.advertising &= ~ADVERTISED_100baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC; + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.advertising |= ADVERTISED_Autoneg; + break; + case XGBE_SFP_BASE_10000_SR: + case XGBE_SFP_BASE_10000_LR: + case XGBE_SFP_BASE_10000_LRM: + case XGBE_SFP_BASE_10000_ER: + case XGBE_SFP_BASE_10000_CR: + default: + pdata->phy.speed = SPEED_10000; + pdata->phy.duplex = DUPLEX_FULL; + pdata->phy.autoneg = AUTONEG_DISABLE; + break; + } + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_CX: + case XGBE_SFP_BASE_10000_CR: + pdata->phy.advertising |= ADVERTISED_TP; + break; + default: + pdata->phy.advertising |= ADVERTISED_FIBRE; + } + + switch (phy_data->sfp_speed) { + case XGBE_SFP_SPEED_100_1000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) + pdata->phy.advertising |= ADVERTISED_100baseT_Full; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + break; + case XGBE_SFP_SPEED_1000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + break; + case XGBE_SFP_SPEED_10000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + break; + default: + /* Choose the fastest supported speed */ + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) + pdata->phy.advertising |= ADVERTISED_100baseT_Full; + } +} + +static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, + enum xgbe_sfp_speed sfp_speed) +{ + u8 *sfp_base, min, max; + + sfp_base = sfp_eeprom->base; + + switch (sfp_speed) { + case XGBE_SFP_SPEED_1000: + min = XGBE_SFP_BASE_BR_1GBE_MIN; + max = XGBE_SFP_BASE_BR_1GBE_MAX; + break; + case XGBE_SFP_SPEED_10000: + min = XGBE_SFP_BASE_BR_10GBE_MIN; + max = XGBE_SFP_BASE_BR_10GBE_MAX; + break; + default: + return false; + } + + return ((sfp_base[XGBE_SFP_BASE_BR] >= min) && + (sfp_base[XGBE_SFP_BASE_BR] <= max)); +} + +static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (phy_data->phydev) { + phy_detach(phy_data->phydev); + phy_device_remove(phy_data->phydev); + phy_device_free(phy_data->phydev); + phy_data->phydev = NULL; + } +} + +static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int phy_id = phy_data->phydev->phy_id; + + if ((phy_id & 0xfffffff0) != 0x01ff0cc0) + return false; + + /* Enable Base-T AN */ + phy_write(phy_data->phydev, 0x16, 0x0001); + phy_write(phy_data->phydev, 0x00, 0x9140); + phy_write(phy_data->phydev, 0x16, 0x0000); + + /* Enable SGMII at 100Base-T/1000Base-T Full Duplex */ + phy_write(phy_data->phydev, 0x1b, 0x9084); + phy_write(phy_data->phydev, 0x09, 0x0e00); + phy_write(phy_data->phydev, 0x00, 0x8140); + phy_write(phy_data->phydev, 0x04, 0x0d01); + phy_write(phy_data->phydev, 0x00, 0x9140); + + phy_data->phydev->supported = PHY_GBIT_FEATURES; + phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + phy_data->phydev->advertising = phy_data->phydev->supported; + + netif_dbg(pdata, drv, pdata->netdev, + "Finisar PHY quirk in place\n"); + + return true; +} + +static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata) +{ + if (xgbe_phy_finisar_phy_quirks(pdata)) + return; +} + +static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct phy_device *phydev; + int ret; + + /* If we already have a PHY, just return */ + if (phy_data->phydev) + return 0; + + /* Check for the use of an external PHY */ + if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE) + return 0; + + /* For SFP, only use an external PHY if available */ + if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && + !phy_data->sfp_phy_avail) + return 0; + + /* Create and connect to the PHY device */ + phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr, + (phy_data->phydev_mode == XGBE_MDIO_MODE_CL45)); + if (IS_ERR(phydev)) { + netdev_err(pdata->netdev, "get_phy_device failed\n"); + return -ENODEV; + } + netif_dbg(pdata, drv, pdata->netdev, "external PHY id is %#010x\n", + phydev->phy_id); + + /*TODO: If c45, add request_module based on one of the MMD ids? */ + + ret = phy_device_register(phydev); + if (ret) { + netdev_err(pdata->netdev, "phy_device_register failed\n"); + phy_device_free(phydev); + return ret; + } + + ret = phy_attach_direct(pdata->netdev, phydev, phydev->dev_flags, + PHY_INTERFACE_MODE_SGMII); + if (ret) { + netdev_err(pdata->netdev, "phy_attach_direct failed\n"); + phy_device_remove(phydev); + phy_device_free(phydev); + return ret; + } + phy_data->phydev = phydev; + + xgbe_phy_external_phy_quirks(pdata); + phydev->advertising &= pdata->phy.advertising; + + phy_start_aneg(phy_data->phydev); + + return 0; +} + +static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + if (!phy_data->sfp_changed) + return; + + phy_data->sfp_phy_avail = 0; + + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return; + + /* Check access to the PHY by reading CTRL1 */ + ret = xgbe_phy_i2c_mii_read(pdata, MII_BMCR); + if (ret < 0) + return; + + /* Successfully accessed the PHY */ + phy_data->sfp_phy_avail = 1; +} + +static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; + + if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], + XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN)) + return false; + + if (!memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], + XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) { + phy_data->sfp_base = XGBE_SFP_BASE_1000_SX; + phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; + phy_data->sfp_speed = XGBE_SFP_SPEED_1000; + if (phy_data->sfp_changed) + netif_dbg(pdata, drv, pdata->netdev, + "Bel-Fuse SFP quirk in place\n"); + return true; + } + + return false; +} + +static bool xgbe_phy_sfp_parse_quirks(struct xgbe_prv_data *pdata) +{ + if (xgbe_phy_belfuse_parse_quirks(pdata)) + return true; + + return false; +} + +static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; + u8 *sfp_base; + + sfp_base = sfp_eeprom->base; + + if (sfp_base[XGBE_SFP_BASE_ID] != XGBE_SFP_ID_SFP) + return; + + if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) + return; + + if (xgbe_phy_sfp_parse_quirks(pdata)) + return; + + /* Assume ACTIVE cable unless told it is PASSIVE */ + if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) { + phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE; + phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN]; + } else { + phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; + } + + /* Determine the type of SFP */ + if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_SR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_LR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LRM) + phy_data->sfp_base = XGBE_SFP_BASE_10000_LRM; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_ER) + phy_data->sfp_base = XGBE_SFP_BASE_10000_ER; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_SX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_SX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_LX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_LX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_CX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_CX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T) + phy_data->sfp_base = XGBE_SFP_BASE_1000_T; + else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) && + xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) + phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + phy_data->sfp_speed = XGBE_SFP_SPEED_100_1000; + break; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + phy_data->sfp_speed = XGBE_SFP_SPEED_1000; + break; + case XGBE_SFP_BASE_10000_SR: + case XGBE_SFP_BASE_10000_LR: + case XGBE_SFP_BASE_10000_LRM: + case XGBE_SFP_BASE_10000_ER: + case XGBE_SFP_BASE_10000_CR: + phy_data->sfp_speed = XGBE_SFP_SPEED_10000; + break; + default: + break; + } +} + +static void xgbe_phy_sfp_eeprom_info(struct xgbe_prv_data *pdata, + struct xgbe_sfp_eeprom *sfp_eeprom) +{ + struct xgbe_sfp_ascii sfp_ascii; + char *sfp_data = (char *)&sfp_ascii; + + netif_dbg(pdata, drv, pdata->netdev, "SFP detected:\n"); + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], + XGBE_SFP_BASE_VENDOR_NAME_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_NAME_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " vendor: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], + XGBE_SFP_BASE_VENDOR_PN_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_PN_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " part number: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_REV], + XGBE_SFP_BASE_VENDOR_REV_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_REV_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " revision level: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->extd[XGBE_SFP_BASE_VENDOR_SN], + XGBE_SFP_BASE_VENDOR_SN_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_SN_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " serial number: %s\n", + sfp_data); +} + +static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len) +{ + u8 cc; + + for (cc = 0; len; buf++, len--) + cc += *buf; + + return (cc == cc_in) ? true : false; +} + +static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom sfp_eeprom; + u8 eeprom_addr; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) { + netdev_err(pdata->netdev, "I2C error setting SFP MUX\n"); + return ret; + } + + /* Read the SFP serial ID eeprom */ + eeprom_addr = 0; + ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS, + &eeprom_addr, sizeof(eeprom_addr), + &sfp_eeprom, sizeof(sfp_eeprom)); + if (ret) { + netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n"); + goto put; + } + + /* Validate the contents read */ + if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.base[XGBE_SFP_BASE_CC], + sfp_eeprom.base, + sizeof(sfp_eeprom.base) - 1)) { + ret = -EINVAL; + goto put; + } + + if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.extd[XGBE_SFP_EXTD_CC], + sfp_eeprom.extd, + sizeof(sfp_eeprom.extd) - 1)) { + ret = -EINVAL; + goto put; + } + + /* Check for an added or changed SFP */ + if (memcmp(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom))) { + phy_data->sfp_changed = 1; + + if (netif_msg_drv(pdata)) + xgbe_phy_sfp_eeprom_info(pdata, &sfp_eeprom); + + memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom)); + + if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) { + u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG]; + + if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE)) + phy_data->sfp_diags = 1; + } + + xgbe_phy_free_phy_device(pdata); + } else { + phy_data->sfp_changed = 0; + } + +put: + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int gpio_input; + u8 gpio_reg, gpio_ports[2]; + int ret; + + /* Read the input port registers */ + gpio_reg = 0; + ret = xgbe_phy_i2c_read(pdata, phy_data->sfp_gpio_address, + &gpio_reg, sizeof(gpio_reg), + gpio_ports, sizeof(gpio_ports)); + if (ret) { + netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n"); + return; + } + + gpio_input = (gpio_ports[1] << 8) | gpio_ports[0]; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) { + /* No GPIO, just assume the module is present for now */ + phy_data->sfp_mod_absent = 0; + } else { + if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent))) + phy_data->sfp_mod_absent = 0; + } + + if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) && + (gpio_input & (1 << phy_data->sfp_gpio_rx_los))) + phy_data->sfp_rx_los = 1; + + if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) && + (gpio_input & (1 << phy_data->sfp_gpio_tx_fault))) + phy_data->sfp_tx_fault = 1; +} + +static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + xgbe_phy_free_phy_device(pdata); + + phy_data->sfp_mod_absent = 1; + phy_data->sfp_phy_avail = 0; + memset(&phy_data->sfp_eeprom, 0, sizeof(phy_data->sfp_eeprom)); +} + +static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data) +{ + phy_data->sfp_rx_los = 0; + phy_data->sfp_tx_fault = 0; + phy_data->sfp_mod_absent = 1; + phy_data->sfp_diags = 0; + phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN; + phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN; + phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN; +} + +static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + /* Reset the SFP signals and info */ + xgbe_phy_sfp_reset(phy_data); + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return; + + /* Read the SFP signals and check for module presence */ + xgbe_phy_sfp_signals(pdata); + if (phy_data->sfp_mod_absent) { + xgbe_phy_sfp_mod_absent(pdata); + goto put; + } + + ret = xgbe_phy_sfp_read_eeprom(pdata); + if (ret) { + /* Treat any error as if there isn't an SFP plugged in */ + xgbe_phy_sfp_reset(phy_data); + xgbe_phy_sfp_mod_absent(pdata); + goto put; + } + + xgbe_phy_sfp_parse_eeprom(pdata); + + xgbe_phy_sfp_external_phy(pdata); + +put: + xgbe_phy_sfp_phy_settings(pdata); + + xgbe_phy_put_comm_ownership(pdata); +} + +static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u16 lcl_adv = 0, rmt_adv = 0; + u8 fc; + + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (!phy_data->phydev) + return; + + if (phy_data->phydev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + if (phy_data->phydev->pause) { + pdata->phy.lp_advertising |= ADVERTISED_Pause; + rmt_adv |= LPA_PAUSE_CAP; + } + if (phy_data->phydev->asym_pause) { + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + rmt_adv |= LPA_PAUSE_ASYM; + } + + fc = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + if (fc & FLOW_CTRL_TX) + pdata->phy.tx_pause = 1; + if (fc & FLOW_CTRL_RX) + pdata->phy.rx_pause = 1; +} + +static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_TP; + + /* Use external PHY to determine flow control */ + if (pdata->phy.pause_autoneg) + xgbe_phy_phydev_flowctrl(pdata); + + switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) { + case XGBE_SGMII_AN_LINK_SPEED_100: + if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { + pdata->phy.lp_advertising |= ADVERTISED_100baseT_Full; + mode = XGBE_MODE_SGMII_100; + } else { + /* Half-duplex not supported */ + pdata->phy.lp_advertising |= ADVERTISED_100baseT_Half; + mode = XGBE_MODE_UNKNOWN; + } + break; + case XGBE_SGMII_AN_LINK_SPEED_1000: + if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full; + mode = XGBE_MODE_SGMII_1000; + } else { + /* Half-duplex not supported */ + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half; + mode = XGBE_MODE_UNKNOWN; + } + break; + default: + mode = XGBE_MODE_UNKNOWN; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_FIBRE; + + /* Compare Advertisement and Link Partner register */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY); + if (lp_reg & 0x100) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x100) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x80) { + if (ad_reg & 0x100) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x100) + pdata->phy.tx_pause = 1; + } + } + + if (lp_reg & 0x40) + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full; + + /* Half duplex is not supported */ + ad_reg &= lp_reg; + mode = (ad_reg & 0x20) ? XGBE_MODE_X : XGBE_MODE_UNKNOWN; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Use external PHY to determine flow control */ + if (pdata->phy.pause_autoneg) + xgbe_phy_phydev_flowctrl(pdata); + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + + ad_reg &= lp_reg; + if (ad_reg & 0x80) { + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + mode = XGBE_MODE_KR; + break; + default: + mode = XGBE_MODE_SFI; + break; + } + } else if (ad_reg & 0x20) { + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + mode = XGBE_MODE_KX_1000; + break; + case XGBE_PORT_MODE_1000BASE_X: + mode = XGBE_MODE_X; + break; + case XGBE_PORT_MODE_SFP: + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) + mode = XGBE_MODE_SGMII_100; + else + mode = XGBE_MODE_SGMII_1000; + break; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + default: + mode = XGBE_MODE_X; + break; + } + break; + default: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) + mode = XGBE_MODE_SGMII_100; + else + mode = XGBE_MODE_SGMII_1000; + break; + } + } else { + mode = XGBE_MODE_UNKNOWN; + } + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Compare Advertisement and Link Partner register 1 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + if (lp_reg & 0x400) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x800) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x400) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x800) { + if (ad_reg & 0x400) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x400) + pdata->phy.tx_pause = 1; + } + } + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + + ad_reg &= lp_reg; + if (ad_reg & 0x80) + mode = XGBE_MODE_KR; + else if (ad_reg & 0x20) + mode = XGBE_MODE_KX_1000; + else + mode = XGBE_MODE_UNKNOWN; + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + return xgbe_phy_an73_outcome(pdata); + case XGBE_AN_MODE_CL73_REDRV: + return xgbe_phy_an73_redrv_outcome(pdata); + case XGBE_AN_MODE_CL37: + return xgbe_phy_an37_outcome(pdata); + case XGBE_AN_MODE_CL37_SGMII: + return xgbe_phy_an37_sgmii_outcome(pdata); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int advertising; + + /* Without a re-driver, just return current advertising */ + if (!phy_data->redrv) + return pdata->phy.advertising; + + /* With the KR re-driver we need to advertise a single speed */ + advertising = pdata->phy.advertising; + advertising &= ~ADVERTISED_1000baseKX_Full; + advertising &= ~ADVERTISED_10000baseKR_Full; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + advertising |= ADVERTISED_10000baseKR_Full; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_NBASE_T: + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_10GBASE_T: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_10000)) + advertising |= ADVERTISED_10000baseKR_Full; + else + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_10GBASE_R: + advertising |= ADVERTISED_10000baseKR_Full; + break; + case XGBE_PORT_MODE_SFP: + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + advertising |= ADVERTISED_1000baseKX_Full; + break; + default: + advertising |= ADVERTISED_10000baseKR_Full; + break; + } + break; + default: + advertising |= ADVERTISED_10000baseKR_Full; + break; + } + + return advertising; +} + +static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_find_phy_device(pdata); + if (ret) + return ret; + + if (!phy_data->phydev) + return 0; + + phy_data->phydev->autoneg = pdata->phy.autoneg; + phy_data->phydev->advertising = phy_data->phydev->supported & + pdata->phy.advertising; + + if (pdata->phy.autoneg != AUTONEG_ENABLE) { + phy_data->phydev->speed = pdata->phy.speed; + phy_data->phydev->duplex = pdata->phy.duplex; + } + + ret = phy_start_aneg(phy_data->phydev); + + return ret; +} + +static enum xgbe_an_mode xgbe_phy_an_sfp_mode(struct xgbe_phy_data *phy_data) +{ + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + return XGBE_AN_MODE_CL37; + default: + return XGBE_AN_MODE_NONE; + } +} + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* A KR re-driver will always require CL73 AN */ + if (phy_data->redrv) + return XGBE_AN_MODE_CL73_REDRV; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return XGBE_AN_MODE_CL73; + case XGBE_PORT_MODE_BACKPLANE_2500: + return XGBE_AN_MODE_NONE; + case XGBE_PORT_MODE_1000BASE_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_PORT_MODE_1000BASE_X: + return XGBE_AN_MODE_CL37; + case XGBE_PORT_MODE_NBASE_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_PORT_MODE_10GBASE_T: + return XGBE_AN_MODE_CL73; + case XGBE_PORT_MODE_10GBASE_R: + return XGBE_AN_MODE_NONE; + case XGBE_PORT_MODE_SFP: + return xgbe_phy_an_sfp_mode(phy_data); + default: + return XGBE_AN_MODE_NONE; + } +} + +static int xgbe_phy_set_redrv_mode_mdio(struct xgbe_prv_data *pdata, + enum xgbe_phy_redrv_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u16 redrv_reg, redrv_val; + + redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); + redrv_val = (u16)mode; + + return pdata->hw_if.write_ext_mii_regs(pdata, phy_data->redrv_addr, + redrv_reg, redrv_val); +} + +static int xgbe_phy_set_redrv_mode_i2c(struct xgbe_prv_data *pdata, + enum xgbe_phy_redrv_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int redrv_reg; + int ret; + + /* Calculate the register to write */ + redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); + + ret = xgbe_phy_redrv_write(pdata, redrv_reg, mode); + + return ret; +} + +static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_phy_redrv_mode mode; + int ret; + + if (!phy_data->redrv) + return; + + mode = XGBE_PHY_REDRV_MODE_CX; + if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && + (phy_data->sfp_base != XGBE_SFP_BASE_1000_CX) && + (phy_data->sfp_base != XGBE_SFP_BASE_10000_CR)) + mode = XGBE_PHY_REDRV_MODE_SR; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return; + + if (phy_data->redrv_if) + xgbe_phy_set_redrv_mode_i2c(pdata, mode); + else + xgbe_phy_set_redrv_mode_mdio(pdata, mode); + + xgbe_phy_put_comm_ownership(pdata); +} + +static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata) +{ + if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + return; + + /* Log if a previous command did not complete */ + netif_dbg(pdata, link, pdata->netdev, + "firmware mailbox not ready for command\n"); +} + +static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) +{ + unsigned int wait; + + /* Wait for command to complete */ + wait = XGBE_RATECHANGE_COUNT; + while (wait--) { + if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + return; + + usleep_range(1000, 2000); + } + + netif_dbg(pdata, link, pdata->netdev, + "firmware mailbox command did not complete\n"); +} + +static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) +{ + unsigned int s0; + + xgbe_phy_start_ratechange(pdata); + + /* Receiver Reset Cycle */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n"); +} + +static void xgbe_phy_power_off(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + xgbe_phy_start_ratechange(pdata); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_UNKNOWN; + + netif_dbg(pdata, link, pdata->netdev, "phy powered off\n"); +} + +static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 10G/SFI */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3); + if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) { + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + } else { + if (phy_data->sfp_cable_len <= 1) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); + else if (phy_data->sfp_cable_len <= 3) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); + else if (phy_data->sfp_cable_len <= 5) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + else + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + } + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SFI; + + netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n"); +} + +static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/X */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_X; + + netif_dbg(pdata, link, pdata->netdev, "1GbE X mode set\n"); +} + +static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/SGMII */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SGMII_1000; + + netif_dbg(pdata, link, pdata->netdev, "1GbE SGMII mode set\n"); +} + +static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/SGMII */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SGMII_100; + + netif_dbg(pdata, link, pdata->netdev, "100MbE SGMII mode set\n"); +} + +static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 10G/KR */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KR; + + netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); +} + +static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 2.5G/KX */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KX_2500; + + netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); +} + +static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/KX */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KX_1000; + + netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); +} + +static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + return phy_data->cur_mode; +} + +static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* No switching if not 10GBase-T */ + if (phy_data->port_mode != XGBE_PORT_MODE_10GBASE_T) + return xgbe_phy_cur_mode(pdata); + + switch (xgbe_phy_cur_mode(pdata)) { + case XGBE_MODE_SGMII_100: + case XGBE_MODE_SGMII_1000: + return XGBE_MODE_KR; + case XGBE_MODE_KR: + default: + return XGBE_MODE_SGMII_1000; + } +} + +static enum xgbe_mode xgbe_phy_switch_bp_2500_mode(struct xgbe_prv_data *pdata) +{ + return XGBE_MODE_KX_2500; +} + +static enum xgbe_mode xgbe_phy_switch_bp_mode(struct xgbe_prv_data *pdata) +{ + /* If we are in KR switch to KX, and vice-versa */ + switch (xgbe_phy_cur_mode(pdata)) { + case XGBE_MODE_KX_1000: + return XGBE_MODE_KR; + case XGBE_MODE_KR: + default: + return XGBE_MODE_KX_1000; + } +} + +static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_switch_bp_mode(pdata); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_switch_bp_2500_mode(pdata); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_switch_baset_mode(pdata); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + case XGBE_PORT_MODE_SFP: + /* No switching, so just return current mode */ + return xgbe_phy_cur_mode(pdata); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_basex_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_1000: + return XGBE_MODE_X; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return XGBE_MODE_SGMII_100; + case SPEED_1000: + return XGBE_MODE_SGMII_1000; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return XGBE_MODE_SGMII_100; + case SPEED_1000: + if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) + return XGBE_MODE_SGMII_1000; + else + return XGBE_MODE_X; + case SPEED_10000: + case SPEED_UNKNOWN: + return XGBE_MODE_SFI; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_bp_2500_mode(int speed) +{ + switch (speed) { + case SPEED_2500: + return XGBE_MODE_KX_2500; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_bp_mode(int speed) +{ + switch (speed) { + case SPEED_1000: + return XGBE_MODE_KX_1000; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata, + int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_get_bp_mode(speed); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_get_bp_2500_mode(speed); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_get_baset_mode(phy_data, speed); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_get_basex_mode(phy_data, speed); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_get_sfp_mode(phy_data, speed); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_phy_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_phy_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_phy_kr_mode(pdata); + break; + case XGBE_MODE_SGMII_100: + xgbe_phy_sgmii_100_mode(pdata); + break; + case XGBE_MODE_SGMII_1000: + xgbe_phy_sgmii_1000_mode(pdata); + break; + case XGBE_MODE_X: + xgbe_phy_x_mode(pdata); + break; + case XGBE_MODE_SFI: + xgbe_phy_sfi_mode(pdata); + break; + default: + break; + } +} + +static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode, u32 advert) +{ + if (pdata->phy.autoneg == AUTONEG_ENABLE) { + if (pdata->phy.advertising & advert) + return true; + } else { + enum xgbe_mode cur_mode; + + cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed); + if (cur_mode == mode) + return true; + } + + return false; +} + +static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_X: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_SGMII_100: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_100baseT_Full); + case XGBE_MODE_SGMII_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (mode) { + case XGBE_MODE_X: + if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_SGMII_100: + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_100baseT_Full); + case XGBE_MODE_SGMII_1000: + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_SFI: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_2500: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_2500baseX_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseKX_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseKR_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_use_bp_mode(pdata, mode); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_use_bp_2500_mode(pdata, mode); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_use_baset_mode(pdata, mode); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_use_basex_mode(pdata, mode); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_use_sfp_mode(pdata, mode); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_1000: + return (phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X); + case SPEED_10000: + return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + case SPEED_1000: + return true; + case SPEED_10000: + return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); + case SPEED_1000: + return ((phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000) || + (phy_data->sfp_speed == XGBE_SFP_SPEED_1000)); + case SPEED_10000: + return (phy_data->sfp_speed == XGBE_SFP_SPEED_10000); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_bp_2500_mode(int speed) +{ + switch (speed) { + case SPEED_2500: + return true; + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_bp_mode(int speed) +{ + switch (speed) { + case SPEED_1000: + case SPEED_10000: + return true; + default: + return false; + } +} + +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_valid_speed_bp_mode(speed); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_valid_speed_bp_2500_mode(speed); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_valid_speed_baset_mode(phy_data, speed); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_valid_speed_basex_mode(phy_data, speed); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_valid_speed_sfp_mode(phy_data, speed); + default: + return false; + } +} + +static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ret, reg; + + *an_restart = 0; + + if (phy_data->port_mode == XGBE_PORT_MODE_SFP) { + /* Check SFP signals */ + xgbe_phy_sfp_detect(pdata); + + if (phy_data->sfp_changed) { + *an_restart = 1; + return 0; + } + + if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los) + return 0; + } + + if (phy_data->phydev) { + /* Check external PHY */ + ret = phy_read_status(phy_data->phydev); + if (ret < 0) + return 0; + + if ((pdata->phy.autoneg == AUTONEG_ENABLE) && + !phy_aneg_done(phy_data->phydev)) + return 0; + + if (!phy_data->phydev->link) + return 0; + } + + /* Link status is latched low, so read once to clear + * and then read again to get current state + */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg & MDIO_STAT1_LSTATUS) + return 1; + + /* No link, attempt a receiver reset cycle */ + if (phy_data->rrc_count++) { + phy_data->rrc_count = 0; + xgbe_phy_rrc(pdata); + } + + return 0; +} + +static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + reg = XP_IOREAD(pdata, XP_PROP_3); + + phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 + + XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR); + + phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK); + + phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3, + GPIO_RX_LOS); + phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3, + GPIO_TX_FAULT); + phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3, + GPIO_MOD_ABS); + phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3, + GPIO_RATE_SELECT); + + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "SFP: gpio_address=%#x\n", + phy_data->sfp_gpio_address); + dev_dbg(pdata->dev, "SFP: gpio_mask=%#x\n", + phy_data->sfp_gpio_mask); + dev_dbg(pdata->dev, "SFP: gpio_rx_los=%u\n", + phy_data->sfp_gpio_rx_los); + dev_dbg(pdata->dev, "SFP: gpio_tx_fault=%u\n", + phy_data->sfp_gpio_tx_fault); + dev_dbg(pdata->dev, "SFP: gpio_mod_absent=%u\n", + phy_data->sfp_gpio_mod_absent); + dev_dbg(pdata->dev, "SFP: gpio_rate_select=%u\n", + phy_data->sfp_gpio_rate_select); + } +} + +static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg, mux_addr_hi, mux_addr_lo; + + reg = XP_IOREAD(pdata, XP_PROP_4); + + mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI); + mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO); + if (mux_addr_lo == XGBE_SFP_DIRECT) + return; + + phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545; + phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo; + phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN); + + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "SFP: mux_address=%#x\n", + phy_data->sfp_mux_address); + dev_dbg(pdata->dev, "SFP: mux_channel=%u\n", + phy_data->sfp_mux_channel); + } +} + +static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata) +{ + xgbe_phy_sfp_comm_setup(pdata); + xgbe_phy_sfp_gpio_setup(pdata); +} + +static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ret; + + ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio); + if (ret) + return ret; + + ret = pdata->hw_if.clr_gpio(pdata, phy_data->mdio_reset_gpio); + + return ret; +} + +static int xgbe_phy_i2c_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u8 gpio_reg, gpio_ports[2], gpio_data[3]; + int ret; + + /* Read the output port registers */ + gpio_reg = 2; + ret = xgbe_phy_i2c_read(pdata, phy_data->mdio_reset_addr, + &gpio_reg, sizeof(gpio_reg), + gpio_ports, sizeof(gpio_ports)); + if (ret) + return ret; + + /* Prepare to write the GPIO data */ + gpio_data[0] = 2; + gpio_data[1] = gpio_ports[0]; + gpio_data[2] = gpio_ports[1]; + + /* Set the GPIO pin */ + if (phy_data->mdio_reset_gpio < 8) + gpio_data[1] |= (1 << (phy_data->mdio_reset_gpio % 8)); + else + gpio_data[2] |= (1 << (phy_data->mdio_reset_gpio % 8)); + + /* Write the output port registers */ + ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, + gpio_data, sizeof(gpio_data)); + if (ret) + return ret; + + /* Clear the GPIO pin */ + if (phy_data->mdio_reset_gpio < 8) + gpio_data[1] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); + else + gpio_data[2] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); + + /* Write the output port registers */ + ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, + gpio_data, sizeof(gpio_data)); + + return ret; +} + +static int xgbe_phy_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) + return 0; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) + ret = xgbe_phy_i2c_mdio_reset(pdata); + else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) + ret = xgbe_phy_int_mdio_reset(pdata); + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data) +{ + if (!phy_data->redrv) + return false; + + if (phy_data->redrv_if >= XGBE_PHY_REDRV_IF_MAX) + return true; + + switch (phy_data->redrv_model) { + case XGBE_PHY_REDRV_MODEL_4223: + if (phy_data->redrv_lane > 3) + return true; + break; + case XGBE_PHY_REDRV_MODEL_4227: + if (phy_data->redrv_lane > 1) + return true; + break; + default: + return true; + } + + return false; +} + +static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) + return 0; + + reg = XP_IOREAD(pdata, XP_PROP_3); + phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET); + switch (phy_data->mdio_reset) { + case XGBE_MDIO_RESET_NONE: + case XGBE_MDIO_RESET_I2C_GPIO: + case XGBE_MDIO_RESET_INT_GPIO: + break; + default: + dev_err(pdata->dev, "unsupported MDIO reset (%#x)\n", + phy_data->mdio_reset); + return -EINVAL; + } + + if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) { + phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 + + XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_I2C_ADDR); + phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_I2C_GPIO); + } else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) { + phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_INT_GPIO); + } + + return 0; +} + +static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) + return false; + break; + case XGBE_PORT_MODE_1000BASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)) + return false; + break; + case XGBE_PORT_MODE_1000BASE_X: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + return false; + break; + case XGBE_PORT_MODE_NBASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)) + return false; + break; + case XGBE_PORT_MODE_10GBASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + case XGBE_PORT_MODE_10GBASE_R: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + return false; + break; + case XGBE_PORT_MODE_SFP: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + default: + break; + } + + return true; +} + +static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + case XGBE_PORT_MODE_BACKPLANE_2500: + if (phy_data->conn_type == XGBE_CONN_TYPE_BACKPLANE) + return false; + break; + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + case XGBE_PORT_MODE_10GBASE_R: + if (phy_data->conn_type == XGBE_CONN_TYPE_MDIO) + return false; + break; + case XGBE_PORT_MODE_SFP: + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + return false; + break; + default: + break; + } + + return true; +} + +static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XP_IOREAD(pdata, XP_PROP_0); + if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS)) + return false; + if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE)) + return false; + + return true; +} + +static void xgbe_phy_stop(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* If we have an external PHY, free it */ + xgbe_phy_free_phy_device(pdata); + + /* Reset SFP data */ + xgbe_phy_sfp_reset(phy_data); + xgbe_phy_sfp_mod_absent(pdata); + + /* Power off the PHY */ + xgbe_phy_power_off(pdata); + + /* Stop the I2C controller */ + pdata->i2c_if.i2c_stop(pdata); +} + +static int xgbe_phy_start(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + /* Start the I2C controller */ + ret = pdata->i2c_if.i2c_start(pdata); + if (ret) + return ret; + + /* Start in highest supported mode */ + xgbe_phy_set_mode(pdata, phy_data->start_mode); + + /* After starting the I2C controller, we can check for an SFP */ + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_SFP: + xgbe_phy_sfp_detect(pdata); + break; + default: + break; + } + + /* If we have an external PHY, start it */ + ret = xgbe_phy_find_phy_device(pdata); + if (ret) + goto err_i2c; + + return 0; + +err_i2c: + pdata->i2c_if.i2c_stop(pdata); + + return ret; +} + +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode cur_mode; + int ret; + + /* Reset by power cycling the PHY */ + cur_mode = phy_data->cur_mode; + xgbe_phy_power_off(pdata); + xgbe_phy_set_mode(pdata, cur_mode); + + if (!phy_data->phydev) + return 0; + + /* Reset the external PHY */ + ret = xgbe_phy_mdio_reset(pdata); + if (ret) + return ret; + + return phy_init_hw(phy_data->phydev); +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* Unregister for driving external PHYs */ + mdiobus_unregister(phy_data->mii); +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data; + struct mii_bus *mii; + unsigned int reg; + int ret; + + /* Check if enabled */ + if (!xgbe_phy_port_enabled(pdata)) { + dev_info(pdata->dev, "device is not enabled\n"); + return -ENODEV; + } + + /* Initialize the I2C controller */ + ret = pdata->i2c_if.i2c_init(pdata); + if (ret) + return ret; + + phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + pdata->phy_data = phy_data; + + reg = XP_IOREAD(pdata, XP_PROP_0); + phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE); + phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID); + phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS); + phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE); + phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR); + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode); + dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id); + dev_dbg(pdata->dev, "port speeds=%#x\n", phy_data->port_speeds); + dev_dbg(pdata->dev, "conn type=%u\n", phy_data->conn_type); + dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr); + } + + reg = XP_IOREAD(pdata, XP_PROP_4); + phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT); + phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF); + phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR); + phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE); + phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL); + if (phy_data->redrv && netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "redrv present\n"); + dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if); + dev_dbg(pdata->dev, "redrv addr=%#x\n", phy_data->redrv_addr); + dev_dbg(pdata->dev, "redrv lane=%u\n", phy_data->redrv_lane); + dev_dbg(pdata->dev, "redrv model=%u\n", phy_data->redrv_model); + } + + /* Validate the connection requested */ + if (xgbe_phy_conn_type_mismatch(pdata)) { + dev_err(pdata->dev, "phy mode/connection mismatch (%#x/%#x)\n", + phy_data->port_mode, phy_data->conn_type); + } + + /* Validate the mode requested */ + if (xgbe_phy_port_mode_mismatch(pdata)) { + dev_err(pdata->dev, "phy mode/speed mismatch (%#x/%#x)\n", + phy_data->port_mode, phy_data->port_speeds); + return -EINVAL; + } + + /* Check for and validate MDIO reset support */ + ret = xgbe_phy_mdio_reset_setup(pdata); + if (ret) + return ret; + + /* Validate the re-driver information */ + if (xgbe_phy_redrv_error(phy_data)) { + dev_err(pdata->dev, "phy re-driver settings error\n"); + return -EINVAL; + } + pdata->kr_redrv = phy_data->redrv; + + /* Indicate current mode is unknown */ + phy_data->cur_mode = XGBE_MODE_UNKNOWN; + + /* Initialize supported features */ + pdata->phy.supported = 0; + + switch (phy_data->port_mode) { + /* Backplane support */ + case XGBE_PORT_MODE_BACKPLANE: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseKX_Full; + phy_data->start_mode = XGBE_MODE_KX_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseKR_Full; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= + SUPPORTED_10000baseR_FEC; + phy_data->start_mode = XGBE_MODE_KR; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + phy_data->start_mode = XGBE_MODE_KX_2500; + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* MDIO 1GBase-T support */ + case XGBE_PORT_MODE_1000BASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + break; + + /* MDIO Base-X support */ + case XGBE_PORT_MODE_1000BASE_X: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_FIBRE; + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_X; + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + break; + + /* MDIO NBase-T support */ + case XGBE_PORT_MODE_NBASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) { + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + phy_data->start_mode = XGBE_MODE_KX_2500; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL45; + break; + + /* 10GBase-T support */ + case XGBE_PORT_MODE_10GBASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + phy_data->start_mode = XGBE_MODE_KR; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* 10GBase-R support */ + case XGBE_PORT_MODE_10GBASE_R: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + phy_data->start_mode = XGBE_MODE_SFI; + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* SFP support */ + case XGBE_PORT_MODE_SFP: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + pdata->phy.supported |= SUPPORTED_FIBRE; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + phy_data->start_mode = XGBE_MODE_SFI; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= + SUPPORTED_10000baseR_FEC; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + + xgbe_phy_sfp_setup(pdata); + break; + default: + return -EINVAL; + } + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "phy supported=%#x\n", + pdata->phy.supported); + + if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) && + (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) { + ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr, + phy_data->phydev_mode); + if (ret) { + dev_err(pdata->dev, + "mdio port/clause not compatible (%d/%u)\n", + phy_data->mdio_addr, phy_data->phydev_mode); + return -EINVAL; + } + } + + if (phy_data->redrv && !phy_data->redrv_if) { + ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr, + XGBE_MDIO_MODE_CL22); + if (ret) { + dev_err(pdata->dev, + "redriver mdio port not compatible (%u)\n", + phy_data->redrv_addr); + return -EINVAL; + } + } + + /* Register for driving external PHYs */ + mii = devm_mdiobus_alloc(pdata->dev); + if (!mii) { + dev_err(pdata->dev, "mdiobus_alloc failed\n"); + return -ENOMEM; + } + + mii->priv = pdata; + mii->name = "amd-xgbe-mii"; + mii->read = xgbe_phy_mii_read; + mii->write = xgbe_phy_mii_write; + mii->parent = pdata->dev; + mii->phy_mask = ~0; + snprintf(mii->id, sizeof(mii->id), "%s", dev_name(pdata->dev)); + ret = mdiobus_register(mii); + if (ret) { + dev_err(pdata->dev, "mdiobus_register failed\n"); + return ret; + } + phy_data->mii = mii; + + return 0; +} + +void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if) +{ + struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl; + + phy_impl->init = xgbe_phy_init; + phy_impl->exit = xgbe_phy_exit; + + phy_impl->reset = xgbe_phy_reset; + phy_impl->start = xgbe_phy_start; + phy_impl->stop = xgbe_phy_stop; + + phy_impl->link_status = xgbe_phy_link_status; + + phy_impl->valid_speed = xgbe_phy_valid_speed; + + phy_impl->use_mode = xgbe_phy_use_mode; + phy_impl->set_mode = xgbe_phy_set_mode; + phy_impl->get_mode = xgbe_phy_get_mode; + phy_impl->switch_mode = xgbe_phy_switch_mode; + phy_impl->cur_mode = xgbe_phy_cur_mode; + + phy_impl->an_mode = xgbe_phy_an_mode; + + phy_impl->an_config = xgbe_phy_an_config; + + phy_impl->an_advertising = xgbe_phy_an_advertising; + + phy_impl->an_outcome = xgbe_phy_an_outcome; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c new file mode 100644 index 000000000000..8c530dccb447 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -0,0 +1,642 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> +#include <linux/clk.h> +#include <linux/property.h> +#include <linux/acpi.h> +#include <linux/mdio.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgbe_acpi_match[]; + +static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) +{ + const struct acpi_device_id *id; + + id = acpi_match_device(xgbe_acpi_match, pdata->dev); + + return id ? (struct xgbe_version_data *)id->driver_data : NULL; +} + +static int xgbe_acpi_support(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + u32 property; + int ret; + + /* Obtain the system clock setting */ + ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property); + if (ret) { + dev_err(dev, "unable to obtain %s property\n", + XGBE_ACPI_DMA_FREQ); + return ret; + } + pdata->sysclk_rate = property; + + /* Obtain the PTP clock setting */ + ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property); + if (ret) { + dev_err(dev, "unable to obtain %s property\n", + XGBE_ACPI_PTP_FREQ); + return ret; + } + pdata->ptpclk_rate = property; + + return 0; +} +#else /* CONFIG_ACPI */ +static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) +{ + return NULL; +} + +static int xgbe_acpi_support(struct xgbe_prv_data *pdata) +{ + return -EINVAL; +} +#endif /* CONFIG_ACPI */ + +#ifdef CONFIG_OF +static const struct of_device_id xgbe_of_match[]; + +static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) +{ + const struct of_device_id *id; + + id = of_match_device(xgbe_of_match, pdata->dev); + + return id ? (struct xgbe_version_data *)id->data : NULL; +} + +static int xgbe_of_support(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + + /* Obtain the system clock setting */ + pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK); + if (IS_ERR(pdata->sysclk)) { + dev_err(dev, "dma devm_clk_get failed\n"); + return PTR_ERR(pdata->sysclk); + } + pdata->sysclk_rate = clk_get_rate(pdata->sysclk); + + /* Obtain the PTP clock setting */ + pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK); + if (IS_ERR(pdata->ptpclk)) { + dev_err(dev, "ptp devm_clk_get failed\n"); + return PTR_ERR(pdata->ptpclk); + } + pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk); + + return 0; +} + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + struct device_node *phy_node; + struct platform_device *phy_pdev; + + phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0); + if (phy_node) { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_pdev = of_find_device_by_node(phy_node); + of_node_put(phy_node); + } else { + /* New style device tree: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + get_device(dev); + phy_pdev = pdata->platdev; + } + + return phy_pdev; +} +#else /* CONFIG_OF */ +static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) +{ + return NULL; +} + +static int xgbe_of_support(struct xgbe_prv_data *pdata) +{ + return -EINVAL; +} + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + return NULL; +} +#endif /* CONFIG_OF */ + +static unsigned int xgbe_resource_count(struct platform_device *pdev, + unsigned int type) +{ + unsigned int count; + int i; + + for (i = 0, count = 0; i < pdev->num_resources; i++) { + struct resource *res = &pdev->resource[i]; + + if (type == resource_type(res)) + count++; + } + + return count; +} + +static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct platform_device *phy_pdev; + + if (pdata->use_acpi) { + get_device(pdata->dev); + phy_pdev = pdata->platdev; + } else { + phy_pdev = xgbe_of_get_phy_pdev(pdata); + } + + return phy_pdev; +} + +static struct xgbe_version_data *xgbe_get_vdata(struct xgbe_prv_data *pdata) +{ + return pdata->use_acpi ? xgbe_acpi_vdata(pdata) + : xgbe_of_vdata(pdata); +} + +static int xgbe_platform_probe(struct platform_device *pdev) +{ + struct xgbe_prv_data *pdata; + struct device *dev = &pdev->dev; + struct platform_device *phy_pdev; + struct resource *res; + const char *phy_mode; + unsigned int phy_memnum, phy_irqnum; + unsigned int dma_irqnum, dma_irqend; + enum dev_dma_attr attr; + int ret; + + pdata = xgbe_alloc_pdata(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto err_alloc; + } + + pdata->platdev = pdev; + pdata->adev = ACPI_COMPANION(dev); + platform_set_drvdata(pdev, pdata); + + /* Check if we should use ACPI or DT */ + pdata->use_acpi = dev->of_node ? 0 : 1; + + /* Get the version data */ + pdata->vdata = xgbe_get_vdata(pdata); + + phy_pdev = xgbe_get_phy_pdev(pdata); + if (!phy_pdev) { + dev_err(dev, "unable to obtain phy device\n"); + ret = -EINVAL; + goto err_phydev; + } + pdata->phy_platdev = phy_pdev; + pdata->phy_dev = &phy_pdev->dev; + + if (pdev == phy_pdev) { + /* New style device tree or ACPI: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; + phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; + dma_irqnum = 1; + dma_irqend = phy_irqnum; + } else { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_memnum = 0; + phy_irqnum = 0; + dma_irqnum = 1; + dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ); + } + + /* Obtain the mmio areas for the device */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pdata->xgmac_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->xgmac_regs)) { + dev_err(dev, "xgmac ioremap failed\n"); + ret = PTR_ERR(pdata->xgmac_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + pdata->xpcs_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->xpcs_regs)) { + dev_err(dev, "xpcs ioremap failed\n"); + ret = PTR_ERR(pdata->xpcs_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->rxtx_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->rxtx_regs)) { + dev_err(dev, "rxtx ioremap failed\n"); + ret = PTR_ERR(pdata->rxtx_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "rxtx_regs = %p\n", pdata->rxtx_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir0_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir0_regs)) { + dev_err(dev, "sir0 ioremap failed\n"); + ret = PTR_ERR(pdata->sir0_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir0_regs = %p\n", pdata->sir0_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir1_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir1_regs)) { + dev_err(dev, "sir1 ioremap failed\n"); + ret = PTR_ERR(pdata->sir1_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); + + /* Retrieve the MAC address */ + ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, + pdata->mac_addr, + sizeof(pdata->mac_addr)); + if (ret || !is_valid_ether_addr(pdata->mac_addr)) { + dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY); + if (!ret) + ret = -EINVAL; + goto err_io; + } + + /* Retrieve the PHY mode - it must be "xgmii" */ + ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY, + &phy_mode); + if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) { + dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY); + if (!ret) + ret = -EINVAL; + goto err_io; + } + pdata->phy_mode = PHY_INTERFACE_MODE_XGMII; + + /* Check for per channel interrupt support */ + if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) { + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_EDGE; + } + + /* Obtain device settings unique to ACPI/OF */ + if (pdata->use_acpi) + ret = xgbe_acpi_support(pdata); + else + ret = xgbe_of_support(pdata); + if (ret) + goto err_io; + + /* Set the DMA coherency values */ + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + ret = -ENODEV; + goto err_io; + } + pdata->coherent = (attr == DEV_DMA_COHERENT); + if (pdata->coherent) { + pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; + pdata->arcache = XGBE_DMA_OS_ARCACHE; + pdata->awcache = XGBE_DMA_OS_AWCACHE; + } else { + pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN; + pdata->arcache = XGBE_DMA_SYS_ARCACHE; + pdata->awcache = XGBE_DMA_SYS_AWCACHE; + } + + /* Set the maximum fifo amounts */ + pdata->tx_max_fifo_size = pdata->vdata->tx_max_fifo_size; + pdata->rx_max_fifo_size = pdata->vdata->rx_max_fifo_size; + + /* Set the hardware channel and queue counts */ + xgbe_set_counts(pdata); + + /* Always have XGMAC and XPCS (auto-negotiation) interrupts */ + pdata->irq_count = 2; + + /* Get the device interrupt */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(dev, "platform_get_irq 0 failed\n"); + goto err_io; + } + pdata->dev_irq = ret; + + /* Get the per channel DMA interrupts */ + if (pdata->per_channel_irq) { + unsigned int i, max = ARRAY_SIZE(pdata->channel_irq); + + for (i = 0; (i < max) && (dma_irqnum < dma_irqend); i++) { + ret = platform_get_irq(pdata->platdev, dma_irqnum++); + if (ret < 0) { + netdev_err(pdata->netdev, + "platform_get_irq %u failed\n", + dma_irqnum - 1); + goto err_io; + } + + pdata->channel_irq[i] = ret; + } + + pdata->channel_irq_count = max; + + pdata->irq_count += max; + } + + /* Get the auto-negotiation interrupt */ + ret = platform_get_irq(phy_pdev, phy_irqnum++); + if (ret < 0) { + dev_err(dev, "platform_get_irq phy 0 failed\n"); + goto err_io; + } + pdata->an_irq = ret; + + /* Configure the netdev resource */ + ret = xgbe_config_netdev(pdata); + if (ret) + goto err_io; + + netdev_notice(pdata->netdev, "net device enabled\n"); + + return 0; + +err_io: + platform_device_put(phy_pdev); + +err_phydev: + xgbe_free_pdata(pdata); + +err_alloc: + dev_notice(dev, "net device not enabled\n"); + + return ret; +} + +static int xgbe_platform_remove(struct platform_device *pdev) +{ + struct xgbe_prv_data *pdata = platform_get_drvdata(pdev); + + xgbe_deconfig_netdev(pdata); + + platform_device_put(pdata->phy_platdev); + + xgbe_free_pdata(pdata); + + return 0; +} + +#ifdef CONFIG_PM +static int xgbe_platform_suspend(struct device *dev) +{ + struct xgbe_prv_data *pdata = dev_get_drvdata(dev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + DBGPR("-->xgbe_suspend\n"); + + if (netif_running(netdev)) + ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + + pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + DBGPR("<--xgbe_suspend\n"); + + return ret; +} + +static int xgbe_platform_resume(struct device *dev) +{ + struct xgbe_prv_data *pdata = dev_get_drvdata(dev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + DBGPR("-->xgbe_resume\n"); + + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + if (netif_running(netdev)) { + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + + DBGPR("<--xgbe_resume\n"); + + return ret; +} +#endif /* CONFIG_PM */ + +static const struct xgbe_version_data xgbe_v1 = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v1, + .xpcs_access = XGBE_XPCS_ACCESS_V1, + .tx_max_fifo_size = 81920, + .rx_max_fifo_size = 81920, + .tx_tstamp_workaround = 1, +}; + +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgbe_acpi_match[] = { + { .id = "AMDI8001", + .driver_data = (kernel_ulong_t)&xgbe_v1 }, + {}, +}; + +MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id xgbe_of_match[] = { + { .compatible = "amd,xgbe-seattle-v1a", + .data = &xgbe_v1 }, + {}, +}; + +MODULE_DEVICE_TABLE(of, xgbe_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops, + xgbe_platform_suspend, xgbe_platform_resume); + +static struct platform_driver xgbe_driver = { + .driver = { + .name = XGBE_DRV_NAME, +#ifdef CONFIG_ACPI + .acpi_match_table = xgbe_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = xgbe_of_match, +#endif + .pm = &xgbe_platform_pm_ops, + }, + .probe = xgbe_platform_probe, + .remove = xgbe_platform_remove, +}; + +int xgbe_platform_init(void) +{ + return platform_driver_register(&xgbe_driver); +} + +void xgbe_platform_exit(void) +{ + platform_driver_unregister(&xgbe_driver); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 5dd17dcea2f8..f52a9bd05bac 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -127,9 +127,10 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <net/dcbnl.h> +#include <linux/completion.h> #define XGBE_DRV_NAME "amd-xgbe" -#define XGBE_DRV_VERSION "1.0.2" +#define XGBE_DRV_VERSION "1.0.3" #define XGBE_DRV_DESC "AMD 10 Gigabit Ethernet Driver" /* Descriptor related defines */ @@ -158,7 +159,8 @@ #define XGBE_MAX_DMA_CHANNELS 16 #define XGBE_MAX_QUEUES 16 -#define XGBE_DMA_STOP_TIMEOUT 5 +#define XGBE_PRIORITY_QUEUES 8 +#define XGBE_DMA_STOP_TIMEOUT 1 /* DMA cache settings - Outer sharable, write-back, write-allocate */ #define XGBE_DMA_OS_AXDOMAIN 0x2 @@ -170,6 +172,10 @@ #define XGBE_DMA_SYS_ARCACHE 0x0 #define XGBE_DMA_SYS_AWCACHE 0x0 +/* DMA channel interrupt modes */ +#define XGBE_IRQ_MODE_EDGE 0 +#define XGBE_IRQ_MODE_LEVEL 1 + #define XGBE_DMA_INTERRUPT_MASK 0x31c7 #define XGMAC_MIN_PACKET 60 @@ -177,18 +183,19 @@ #define XGMAC_MAX_STD_PACKET 1518 #define XGMAC_JUMBO_PACKET_MTU 9000 #define XGMAC_MAX_JUMBO_PACKET 9018 +#define XGMAC_ETH_PREAMBLE (12 + 8) /* Inter-frame gap + preamble */ + +#define XGMAC_PFC_DATA_LEN 46 +#define XGMAC_PFC_DELAYS 14000 + +#define XGMAC_PRIO_QUEUES(_cnt) \ + min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt)) /* Common property names */ #define XGBE_MAC_ADDR_PROPERTY "mac-address" #define XGBE_PHY_MODE_PROPERTY "phy-mode" #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt" #define XGBE_SPEEDSET_PROPERTY "amd,speed-set" -#define XGBE_BLWC_PROPERTY "amd,serdes-blwc" -#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" -#define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" -#define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" -#define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" -#define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" /* Device-tree clock names */ #define XGBE_DMA_CLOCK "dma_clk" @@ -198,6 +205,20 @@ #define XGBE_ACPI_DMA_FREQ "amd,dma-freq" #define XGBE_ACPI_PTP_FREQ "amd,ptp-freq" +/* PCI BAR mapping */ +#define XGBE_XGMAC_BAR 0 +#define XGBE_XPCS_BAR 1 +#define XGBE_MAC_PROP_OFFSET 0x1d000 +#define XGBE_I2C_CTRL_OFFSET 0x1e000 + +/* PCI MSIx support */ +#define XGBE_MSIX_BASE_COUNT 4 +#define XGBE_MSIX_MIN_COUNT (XGBE_MSIX_BASE_COUNT + 1) + +/* PCI clock frequencies */ +#define XGBE_V2_DMA_CLOCK_FREQ 500000000 /* 500 MHz */ +#define XGBE_V2_PTP_CLOCK_FREQ 125000000 /* 125 MHz */ + /* Timestamp support - values based on 50MHz PTP clock * 50MHz => 20 nsec */ @@ -208,7 +229,12 @@ #define XGMAC_DRIVER_CONTEXT 1 #define XGMAC_IOCTL_CONTEXT 2 -#define XGBE_FIFO_MAX 81920 +#define XGMAC_FIFO_MIN_ALLOC 2048 +#define XGMAC_FIFO_UNIT 256 +#define XGMAC_FIFO_ALIGN(_x) \ + (((_x) + XGMAC_FIFO_UNIT - 1) & ~(XGMAC_FIFO_UNIT - 1)) +#define XGMAC_FIFO_FC_OFF 2048 +#define XGMAC_FIFO_FC_MIN 4096 #define XGBE_TC_MIN_QUANTUM 10 @@ -233,6 +259,14 @@ /* Flow control queue count */ #define XGMAC_MAX_FLOW_CONTROL_QUEUES 8 +/* Flow control threshold units */ +#define XGMAC_FLOW_CONTROL_UNIT 512 +#define XGMAC_FLOW_CONTROL_ALIGN(_x) \ + (((_x) + XGMAC_FLOW_CONTROL_UNIT - 1) & ~(XGMAC_FLOW_CONTROL_UNIT - 1)) +#define XGMAC_FLOW_CONTROL_VALUE(_x) \ + (((_x) < 1024) ? 0 : ((_x) / XGMAC_FLOW_CONTROL_UNIT) - 2) +#define XGMAC_FLOW_CONTROL_MAX 33280 + /* Maximum MAC address hash table size (256 bits = 8 bytes) */ #define XGBE_MAC_HASH_TABLE_SIZE 8 @@ -244,46 +278,19 @@ /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 -#define XGBE_LINK_TIMEOUT 10 - -#define XGBE_AN_INT_CMPLT 0x01 -#define XGBE_AN_INC_LINK 0x02 -#define XGBE_AN_PG_RCV 0x04 -#define XGBE_AN_INT_MASK 0x07 - -/* Rate-change complete wait/retry count */ -#define XGBE_RATECHANGE_COUNT 500 - -/* Default SerDes settings */ -#define XGBE_SPEED_10000_BLWC 0 -#define XGBE_SPEED_10000_CDR 0x7 -#define XGBE_SPEED_10000_PLL 0x1 -#define XGBE_SPEED_10000_PQ 0x12 -#define XGBE_SPEED_10000_RATE 0x0 -#define XGBE_SPEED_10000_TXAMP 0xa -#define XGBE_SPEED_10000_WORD 0x7 -#define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 -#define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f - -#define XGBE_SPEED_2500_BLWC 1 -#define XGBE_SPEED_2500_CDR 0x2 -#define XGBE_SPEED_2500_PLL 0x0 -#define XGBE_SPEED_2500_PQ 0xa -#define XGBE_SPEED_2500_RATE 0x1 -#define XGBE_SPEED_2500_TXAMP 0xf -#define XGBE_SPEED_2500_WORD 0x1 -#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 -#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 - -#define XGBE_SPEED_1000_BLWC 1 -#define XGBE_SPEED_1000_CDR 0x2 -#define XGBE_SPEED_1000_PLL 0x0 -#define XGBE_SPEED_1000_PQ 0xa -#define XGBE_SPEED_1000_RATE 0x3 -#define XGBE_SPEED_1000_TXAMP 0xf -#define XGBE_SPEED_1000_WORD 0x1 -#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 -#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 +#define XGBE_LINK_TIMEOUT 5 + +#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) +#define XGBE_SGMII_AN_LINK_SPEED_100 0x04 +#define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) + +/* ECC correctable error notification window (seconds) */ +#define XGBE_ECC_LIMIT 60 + +/* MDIO port types */ +#define XGMAC_MAX_C22_PORT 3 struct xgbe_prv_data; @@ -461,6 +468,7 @@ enum xgbe_state { XGBE_DOWN, XGBE_LINK_INIT, XGBE_LINK_ERR, + XGBE_STOPPED, }; enum xgbe_int { @@ -480,6 +488,12 @@ enum xgbe_int_state { XGMAC_INT_STATE_RESTORE, }; +enum xgbe_ecc_sec { + XGBE_ECC_SEC_TX, + XGBE_ECC_SEC_RX, + XGBE_ECC_SEC_DESC, +}; + enum xgbe_speed { XGBE_SPEED_1000 = 0, XGBE_SPEED_2500, @@ -487,6 +501,19 @@ enum xgbe_speed { XGBE_SPEEDS, }; +enum xgbe_xpcs_access { + XGBE_XPCS_ACCESS_V1 = 0, + XGBE_XPCS_ACCESS_V2, +}; + +enum xgbe_an_mode { + XGBE_AN_MODE_CL73 = 0, + XGBE_AN_MODE_CL73_REDRV, + XGBE_AN_MODE_CL37, + XGBE_AN_MODE_CL37_SGMII, + XGBE_AN_MODE_NONE, +}; + enum xgbe_an { XGBE_AN_READY = 0, XGBE_AN_PAGE_RECEIVED, @@ -504,8 +531,14 @@ enum xgbe_rx { }; enum xgbe_mode { - XGBE_MODE_KR = 0, - XGBE_MODE_KX, + XGBE_MODE_KX_1000 = 0, + XGBE_MODE_KX_2500, + XGBE_MODE_KR, + XGBE_MODE_X, + XGBE_MODE_SGMII_100, + XGBE_MODE_SGMII_1000, + XGBE_MODE_SFI, + XGBE_MODE_UNKNOWN, }; enum xgbe_speedset { @@ -513,6 +546,12 @@ enum xgbe_speedset { XGBE_SPEEDSET_2500_10000, }; +enum xgbe_mdio_mode { + XGBE_MDIO_MODE_NONE = 0, + XGBE_MDIO_MODE_CL22, + XGBE_MDIO_MODE_CL45, +}; + struct xgbe_phy { u32 supported; u32 advertising; @@ -531,6 +570,43 @@ struct xgbe_phy { int rx_pause; }; +enum xgbe_i2c_cmd { + XGBE_I2C_CMD_READ = 0, + XGBE_I2C_CMD_WRITE, +}; + +struct xgbe_i2c_op { + enum xgbe_i2c_cmd cmd; + + unsigned int target; + + void *buf; + unsigned int len; +}; + +struct xgbe_i2c_op_state { + struct xgbe_i2c_op *op; + + unsigned int tx_len; + unsigned char *tx_buf; + + unsigned int rx_len; + unsigned char *rx_buf; + + unsigned int tx_abort_source; + + int ret; +}; + +struct xgbe_i2c { + unsigned int started; + unsigned int max_speed_mode; + unsigned int rx_fifo_size; + unsigned int tx_fifo_size; + + struct xgbe_i2c_op_state op_state; +}; + struct xgbe_mmc_stats { /* Tx Stats */ u64 txoctetcount_gb; @@ -601,9 +677,15 @@ struct xgbe_hw_if { int (*read_mmd_regs)(struct xgbe_prv_data *, int, int); void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int); - int (*set_gmii_speed)(struct xgbe_prv_data *); - int (*set_gmii_2500_speed)(struct xgbe_prv_data *); - int (*set_xgmii_speed)(struct xgbe_prv_data *); + int (*set_speed)(struct xgbe_prv_data *, int); + + int (*set_ext_mii_mode)(struct xgbe_prv_data *, unsigned int, + enum xgbe_mdio_mode); + int (*read_ext_mii_regs)(struct xgbe_prv_data *, int, int); + int (*write_ext_mii_regs)(struct xgbe_prv_data *, int, int, u16); + + int (*set_gpio)(struct xgbe_prv_data *, unsigned int); + int (*clr_gpio)(struct xgbe_prv_data *, unsigned int); void (*enable_tx)(struct xgbe_prv_data *); void (*disable_tx)(struct xgbe_prv_data *); @@ -682,11 +764,65 @@ struct xgbe_hw_if { int (*disable_rss)(struct xgbe_prv_data *); int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *); int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *); + + /* For ECC */ + void (*disable_ecc_ded)(struct xgbe_prv_data *); + void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec); +}; + +/* This structure represents implementation specific routines for an + * implementation of a PHY. All routines are required unless noted below. + * Optional routines: + * kr_training_pre, kr_training_post + */ +struct xgbe_phy_impl_if { + /* Perform Setup/teardown actions */ + int (*init)(struct xgbe_prv_data *); + void (*exit)(struct xgbe_prv_data *); + + /* Perform start/stop specific actions */ + int (*reset)(struct xgbe_prv_data *); + int (*start)(struct xgbe_prv_data *); + void (*stop)(struct xgbe_prv_data *); + + /* Return the link status */ + int (*link_status)(struct xgbe_prv_data *, int *); + + /* Indicate if a particular speed is valid */ + bool (*valid_speed)(struct xgbe_prv_data *, int); + + /* Check if the specified mode can/should be used */ + bool (*use_mode)(struct xgbe_prv_data *, enum xgbe_mode); + /* Switch the PHY into various modes */ + void (*set_mode)(struct xgbe_prv_data *, enum xgbe_mode); + /* Retrieve mode needed for a specific speed */ + enum xgbe_mode (*get_mode)(struct xgbe_prv_data *, int); + /* Retrieve new/next mode when trying to auto-negotiate */ + enum xgbe_mode (*switch_mode)(struct xgbe_prv_data *); + /* Retrieve current mode */ + enum xgbe_mode (*cur_mode)(struct xgbe_prv_data *); + + /* Retrieve current auto-negotiation mode */ + enum xgbe_an_mode (*an_mode)(struct xgbe_prv_data *); + + /* Configure auto-negotiation settings */ + int (*an_config)(struct xgbe_prv_data *); + + /* Set/override auto-negotiation advertisement settings */ + unsigned int (*an_advertising)(struct xgbe_prv_data *); + + /* Process results of auto-negotiation */ + enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *); + + /* Pre/Post KR training enablement support */ + void (*kr_training_pre)(struct xgbe_prv_data *); + void (*kr_training_post)(struct xgbe_prv_data *); }; struct xgbe_phy_if { - /* For initial PHY setup */ - void (*phy_init)(struct xgbe_prv_data *); + /* For PHY setup/teardown */ + int (*phy_init)(struct xgbe_prv_data *); + void (*phy_exit)(struct xgbe_prv_data *); /* For PHY support when setting device up/down */ int (*phy_reset)(struct xgbe_prv_data *); @@ -696,6 +832,30 @@ struct xgbe_phy_if { /* For PHY support while device is up */ void (*phy_status)(struct xgbe_prv_data *); int (*phy_config_aneg)(struct xgbe_prv_data *); + + /* For PHY settings validation */ + bool (*phy_valid_speed)(struct xgbe_prv_data *, int); + + /* For single interrupt support */ + irqreturn_t (*an_isr)(int, struct xgbe_prv_data *); + + /* PHY implementation specific services */ + struct xgbe_phy_impl_if phy_impl; +}; + +struct xgbe_i2c_if { + /* For initial I2C setup */ + int (*i2c_init)(struct xgbe_prv_data *); + + /* For I2C support when setting device up/down */ + int (*i2c_start)(struct xgbe_prv_data *); + void (*i2c_stop)(struct xgbe_prv_data *); + + /* For performing I2C operations */ + int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *); + + /* For single interrupt support */ + irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *); }; struct xgbe_desc_if { @@ -755,11 +915,28 @@ struct xgbe_hw_features { unsigned int aux_snap_num; /* Number of Aux snapshot inputs */ }; +struct xgbe_version_data { + void (*init_function_ptrs_phy_impl)(struct xgbe_phy_if *); + enum xgbe_xpcs_access xpcs_access; + unsigned int mmc_64bit; + unsigned int tx_max_fifo_size; + unsigned int rx_max_fifo_size; + unsigned int tx_tstamp_workaround; + unsigned int ecc_support; + unsigned int i2c_support; +}; + struct xgbe_prv_data { struct net_device *netdev; - struct platform_device *pdev; + struct pci_dev *pcidev; + struct platform_device *platdev; struct acpi_device *adev; struct device *dev; + struct platform_device *phy_platdev; + struct device *phy_dev; + + /* Version related data */ + struct xgbe_version_data *vdata; /* ACPI or DT flag */ unsigned int use_acpi; @@ -770,12 +947,17 @@ struct xgbe_prv_data { void __iomem *rxtx_regs; /* SerDes Rx/Tx CSRs */ void __iomem *sir0_regs; /* SerDes integration registers (1/2) */ void __iomem *sir1_regs; /* SerDes integration registers (2/2) */ + void __iomem *xprop_regs; /* XGBE property registers */ + void __iomem *xi2c_regs; /* XGBE I2C CSRs */ /* Overall device lock */ spinlock_t lock; /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; + unsigned int xpcs_window; + unsigned int xpcs_window_size; + unsigned int xpcs_window_mask; /* RSS addressing mutex */ struct mutex rss_mutex; @@ -783,12 +965,39 @@ struct xgbe_prv_data { /* Flags representing xgbe_state */ unsigned long dev_state; + /* ECC support */ + unsigned long tx_sec_period; + unsigned long tx_ded_period; + unsigned long rx_sec_period; + unsigned long rx_ded_period; + unsigned long desc_sec_period; + unsigned long desc_ded_period; + + unsigned int tx_sec_count; + unsigned int tx_ded_count; + unsigned int rx_sec_count; + unsigned int rx_ded_count; + unsigned int desc_ded_count; + unsigned int desc_sec_count; + + struct msix_entry *msix_entries; int dev_irq; + int ecc_irq; + int i2c_irq; + int channel_irq[XGBE_MAX_DMA_CHANNELS]; + unsigned int per_channel_irq; + unsigned int irq_shared; + unsigned int irq_count; + unsigned int channel_irq_count; + unsigned int channel_irq_mode; + + char ecc_name[IFNAMSIZ + 32]; struct xgbe_hw_if hw_if; struct xgbe_phy_if phy_if; struct xgbe_desc_if desc_if; + struct xgbe_i2c_if i2c_if; /* AXI DMA settings */ unsigned int coherent; @@ -803,12 +1012,16 @@ struct xgbe_prv_data { /* Rings for Tx/Rx on a DMA channel */ struct xgbe_channel *channel; + unsigned int tx_max_channel_count; + unsigned int rx_max_channel_count; unsigned int channel_count; unsigned int tx_ring_count; unsigned int tx_desc_count; unsigned int rx_ring_count; unsigned int rx_desc_count; + unsigned int tx_max_q_count; + unsigned int rx_max_q_count; unsigned int tx_q_count; unsigned int rx_q_count; @@ -820,11 +1033,13 @@ struct xgbe_prv_data { unsigned int tx_threshold; unsigned int tx_pbl; unsigned int tx_osp_mode; + unsigned int tx_max_fifo_size; /* Rx settings */ unsigned int rx_sf_mode; unsigned int rx_threshold; unsigned int rx_pbl; + unsigned int rx_max_fifo_size; /* Tx coalescing settings */ unsigned int tx_usecs; @@ -842,6 +1057,8 @@ struct xgbe_prv_data { unsigned int pause_autoneg; unsigned int tx_pause; unsigned int rx_pause; + unsigned int rx_rfa[XGBE_MAX_QUEUES]; + unsigned int rx_rfd[XGBE_MAX_QUEUES]; /* Receive Side Scaling settings */ u8 rss_key[XGBE_RSS_HASH_KEY_SIZE]; @@ -881,13 +1098,16 @@ struct xgbe_prv_data { struct ieee_pfc *pfc; unsigned int q2tc_map[XGBE_MAX_QUEUES]; unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS]; + unsigned int pfcq[XGBE_MAX_QUEUES]; + unsigned int pfc_rfa; u8 num_tcs; /* Hardware features of the device */ struct xgbe_hw_features hw_feat; - /* Device restart work structure */ + /* Device work structures */ struct work_struct restart_work; + struct work_struct stopdev_work; /* Keeps track of power mode */ unsigned int power_down; @@ -901,9 +1121,14 @@ struct xgbe_prv_data { int phy_speed; /* MDIO/PHY related settings */ + unsigned int phy_started; + void *phy_data; struct xgbe_phy phy; int mdio_mmd; unsigned long link_check; + struct completion mdio_complete; + + unsigned int kr_redrv; char an_name[IFNAMSIZ + 32]; struct workqueue_struct *an_workqueue; @@ -911,23 +1136,9 @@ struct xgbe_prv_data { int an_irq; struct work_struct an_irq_work; - unsigned int speed_set; - - /* SerDes UEFI configurable settings. - * Switching between modes/speeds requires new values for some - * SerDes settings. The values can be supplied as device - * properties in array format. The first array entry is for - * 1GbE, second for 2.5GbE and third for 10GbE - */ - u32 serdes_blwc[XGBE_SPEEDS]; - u32 serdes_cdr_rate[XGBE_SPEEDS]; - u32 serdes_pq_skew[XGBE_SPEEDS]; - u32 serdes_tx_amp[XGBE_SPEEDS]; - u32 serdes_dfe_tap_cfg[XGBE_SPEEDS]; - u32 serdes_dfe_tap_ena[XGBE_SPEEDS]; - /* Auto-negotiation state machine support */ unsigned int an_int; + unsigned int an_status; struct mutex an_mutex; enum xgbe_an an_result; enum xgbe_an an_state; @@ -938,6 +1149,13 @@ struct xgbe_prv_data { unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; + enum xgbe_an_mode an_mode; + + /* I2C support */ + struct xgbe_i2c i2c; + struct mutex i2c_mutex; + struct completion i2c_complete; + char i2c_name[IFNAMSIZ + 32]; unsigned int lpm_ctrl; /* CTRL1 for resume */ @@ -948,14 +1166,36 @@ struct xgbe_prv_data { unsigned int debugfs_xpcs_mmd; unsigned int debugfs_xpcs_reg; + + unsigned int debugfs_xprop_reg; + + unsigned int debugfs_xi2c_reg; #endif }; /* Function prototypes*/ +struct xgbe_prv_data *xgbe_alloc_pdata(struct device *); +void xgbe_free_pdata(struct xgbe_prv_data *); +void xgbe_set_counts(struct xgbe_prv_data *); +int xgbe_config_netdev(struct xgbe_prv_data *); +void xgbe_deconfig_netdev(struct xgbe_prv_data *); + +int xgbe_platform_init(void); +void xgbe_platform_exit(void); +#ifdef CONFIG_PCI +int xgbe_pci_init(void); +void xgbe_pci_exit(void); +#else +static inline int xgbe_pci_init(void) { return 0; } +static inline void xgbe_pci_exit(void) { } +#endif void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); +void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *); +void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); +void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *); const struct net_device_ops *xgbe_get_netdev_ops(void); const struct ethtool_ops *xgbe_get_ethtool_ops(void); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 8158d4698734..1352b5245fcc 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1253,7 +1253,6 @@ static const struct net_device_ops xgene_ndev_ops = { .ndo_start_xmit = xgene_enet_start_xmit, .ndo_tx_timeout = xgene_enet_timeout, .ndo_get_stats64 = xgene_enet_get_stats64, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = xgene_enet_set_mac_address, }; @@ -1382,9 +1381,13 @@ static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata) { struct device *dev = &pdata->pdev->dev; - if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) + pdata->sfp_gpio_en = false; + if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII || + (!device_property_present(dev, "sfp-gpios") && + !device_property_present(dev, "rxlos-gpios"))) return; + pdata->sfp_gpio_en = true; pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN); if (IS_ERR(pdata->sfp_rdy)) pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 0cda58f5a840..011965b54d1f 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -219,6 +219,7 @@ struct xgene_enet_pdata { u8 rx_delay; bool mdio_driver; struct gpio_desc *sfp_rdy; + bool sfp_gpio_en; }; struct xgene_indirect_ctl { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 6475f383ba83..d1758b072623 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -415,16 +415,31 @@ static void xgene_enet_clear(struct xgene_enet_pdata *pdata, xgene_enet_wr_ring_if(pdata, addr, data); } +static int xgene_enet_gpio_lookup(struct xgene_enet_pdata *pdata) +{ + struct device *dev = &pdata->pdev->dev; + + pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN); + if (IS_ERR(pdata->sfp_rdy)) + pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN); + + if (IS_ERR(pdata->sfp_rdy)) + return -ENODEV; + + return 0; +} + static void xgene_enet_link_state(struct work_struct *work) { struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work), struct xgene_enet_pdata, link_work); - struct gpio_desc *sfp_rdy = pdata->sfp_rdy; struct net_device *ndev = pdata->ndev; u32 link_status, poll_interval; link_status = xgene_enet_link_status(pdata); - if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy)) + if (pdata->sfp_gpio_en && link_status && + (!IS_ERR(pdata->sfp_rdy) || !xgene_enet_gpio_lookup(pdata)) && + !gpiod_get_value(pdata->sfp_rdy)) link_status = 0; if (link_status) { diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index a65d7a60f116..2b2d87089987 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -1237,7 +1237,6 @@ static const struct net_device_ops bmac_netdev_ops = { .ndo_start_xmit = bmac_output, .ndo_set_rx_mode = bmac_set_multicast, .ndo_set_mac_address = bmac_set_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index e58a7c73766e..96dd5300e0e5 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -102,7 +102,6 @@ static const struct net_device_ops mace_netdev_ops = { .ndo_start_xmit = mace_xmit_start, .ndo_set_rx_mode = mace_set_multicast, .ndo_set_mac_address = mace_set_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 89914ca17a49..857df9c45f04 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -186,7 +186,6 @@ static const struct net_device_ops mace_netdev_ops = { .ndo_tx_timeout = mace_tx_timeout, .ndo_set_rx_mode = mace_set_multicast, .ndo_set_mac_address = mace_set_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b0da9693f28a..95d8b3ea7bc3 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -633,7 +633,7 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) if (unlikely(dma_mapping_error(&ndev->dev, addr))) { stats->tx_dropped++; stats->tx_errors++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr); diff --git a/drivers/net/ethernet/atheros/alx/ethtool.c b/drivers/net/ethernet/atheros/alx/ethtool.c index 08e22df2a300..2f4eabf652e8 100644 --- a/drivers/net/ethernet/atheros/alx/ethtool.c +++ b/drivers/net/ethernet/atheros/alx/ethtool.c @@ -125,64 +125,75 @@ static u32 alx_get_supported_speeds(struct alx_hw *hw) return supported; } -static int alx_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +static int alx_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct alx_priv *alx = netdev_priv(netdev); struct alx_hw *hw = &alx->hw; + u32 supported, advertising; - ecmd->supported = SUPPORTED_Autoneg | + supported = SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_Pause | SUPPORTED_Asym_Pause; if (alx_hw_giga(hw)) - ecmd->supported |= SUPPORTED_1000baseT_Full; - ecmd->supported |= alx_get_supported_speeds(hw); + supported |= SUPPORTED_1000baseT_Full; + supported |= alx_get_supported_speeds(hw); - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; if (hw->adv_cfg & ADVERTISED_Autoneg) - ecmd->advertising |= hw->adv_cfg; + advertising |= hw->adv_cfg; - ecmd->port = PORT_TP; - ecmd->phy_address = 0; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (hw->adv_cfg & ADVERTISED_Autoneg) - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - ecmd->autoneg = AUTONEG_DISABLE; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.autoneg = AUTONEG_DISABLE; if (hw->flowctrl & ALX_FC_ANEG && hw->adv_cfg & ADVERTISED_Autoneg) { if (hw->flowctrl & ALX_FC_RX) { - ecmd->advertising |= ADVERTISED_Pause; + advertising |= ADVERTISED_Pause; if (!(hw->flowctrl & ALX_FC_TX)) - ecmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; } else if (hw->flowctrl & ALX_FC_TX) { - ecmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; } } - ethtool_cmd_speed_set(ecmd, hw->link_speed); - ecmd->duplex = hw->duplex; + cmd->base.speed = hw->link_speed; + cmd->base.duplex = hw->duplex; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } -static int alx_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +static int alx_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct alx_priv *alx = netdev_priv(netdev); struct alx_hw *hw = &alx->hw; u32 adv_cfg; + u32 advertising; ASSERT_RTNL(); - if (ecmd->autoneg == AUTONEG_ENABLE) { - if (ecmd->advertising & ~alx_get_supported_speeds(hw)) + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (advertising & ~alx_get_supported_speeds(hw)) return -EINVAL; - adv_cfg = ecmd->advertising | ADVERTISED_Autoneg; + adv_cfg = advertising | ADVERTISED_Autoneg; } else { - adv_cfg = alx_speed_to_ethadv(ethtool_cmd_speed(ecmd), - ecmd->duplex); + adv_cfg = alx_speed_to_ethadv(cmd->base.speed, + cmd->base.duplex); if (!adv_cfg || adv_cfg == ADVERTISED_1000baseT_Full) return -EINVAL; @@ -300,8 +311,6 @@ static int alx_get_sset_count(struct net_device *netdev, int sset) } const struct ethtool_ops alx_ethtool_ops = { - .get_settings = alx_get_settings, - .set_settings = alx_set_settings, .get_pauseparam = alx_get_pauseparam, .set_pauseparam = alx_set_pauseparam, .get_msglevel = alx_get_msglevel, @@ -310,4 +319,6 @@ const struct ethtool_ops alx_ethtool_ops = { .get_strings = alx_get_strings, .get_sset_count = alx_get_sset_count, .get_ethtool_stats = alx_get_ethtool_stats, + .get_link_ksettings = alx_get_link_ksettings, + .set_link_ksettings = alx_set_link_ksettings, }; diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h index 0191477ace51..e42d7e0947eb 100644 --- a/drivers/net/ethernet/atheros/alx/hw.h +++ b/drivers/net/ethernet/atheros/alx/hw.h @@ -351,7 +351,6 @@ struct alx_rrd { #define ALX_MAX_JUMBO_PKT_SIZE (9*1024) #define ALX_MAX_TSO_PKT_SIZE (7*1024) #define ALX_MAX_FRAME_SIZE ALX_MAX_JUMBO_PKT_SIZE -#define ALX_MIN_FRAME_SIZE (ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) #define ALX_MAX_RX_QUEUES 8 #define ALX_MAX_TX_QUEUES 4 diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c0f84b73574d..eccbacd96201 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -892,6 +892,9 @@ static int alx_init_sw(struct alx_priv *alx) hw->smb_timer = 400; hw->mtu = alx->dev->mtu; alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu); + /* MTU range: 34 - 9256 */ + alx->dev->min_mtu = 34; + alx->dev->max_mtu = ALX_MAX_FRAME_LEN(ALX_MAX_FRAME_SIZE); alx->tx_ringsz = 256; alx->rx_ringsz = 512; hw->imt = 200; @@ -994,13 +997,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) struct alx_priv *alx = netdev_priv(netdev); int max_frame = ALX_MAX_FRAME_LEN(mtu); - if ((max_frame < ALX_MIN_FRAME_SIZE) || - (max_frame > ALX_MAX_FRAME_SIZE)) - return -EINVAL; - - if (netdev->mtu == mtu) - return 0; - netdev->mtu = mtu; alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c index 872b7abb0196..cfe86a20c899 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c @@ -26,46 +26,52 @@ #include "atl1c.h" -static int atl1c_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1c_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct atl1c_adapter *adapter = netdev_priv(netdev); struct atl1c_hw *hw = &adapter->hw; + u32 supported, advertising; - ecmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP); if (hw->link_cap_flags & ATL1C_LINK_CAP_1000M) - ecmd->supported |= SUPPORTED_1000baseT_Full; + supported |= SUPPORTED_1000baseT_Full; - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; - ecmd->advertising |= hw->autoneg_advertised; + advertising |= hw->autoneg_advertised; - ecmd->port = PORT_TP; - ecmd->phy_address = 0; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (adapter->link_speed != SPEED_0) { - ethtool_cmd_speed_set(ecmd, adapter->link_speed); + cmd->base.speed = adapter->link_speed; if (adapter->link_duplex == FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int atl1c_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1c_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct atl1c_adapter *adapter = netdev_priv(netdev); struct atl1c_hw *hw = &adapter->hw; @@ -74,12 +80,12 @@ static int atl1c_set_settings(struct net_device *netdev, while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { autoneg_advertised = ADVERTISED_Autoneg; } else { - u32 speed = ethtool_cmd_speed(ecmd); + u32 speed = cmd->base.speed; if (speed == SPEED_1000) { - if (ecmd->duplex != DUPLEX_FULL) { + if (cmd->base.duplex != DUPLEX_FULL) { if (netif_msg_link(adapter)) dev_warn(&adapter->pdev->dev, "1000M half is invalid\n"); @@ -88,12 +94,12 @@ static int atl1c_set_settings(struct net_device *netdev, } autoneg_advertised = ADVERTISED_1000baseT_Full; } else if (speed == SPEED_100) { - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) autoneg_advertised = ADVERTISED_100baseT_Full; else autoneg_advertised = ADVERTISED_100baseT_Half; } else { - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) autoneg_advertised = ADVERTISED_10baseT_Full; else autoneg_advertised = ADVERTISED_10baseT_Half; @@ -284,8 +290,6 @@ static int atl1c_nway_reset(struct net_device *netdev) } static const struct ethtool_ops atl1c_ethtool_ops = { - .get_settings = atl1c_get_settings, - .set_settings = atl1c_set_settings, .get_drvinfo = atl1c_get_drvinfo, .get_regs_len = atl1c_get_regs_len, .get_regs = atl1c_get_regs, @@ -297,6 +301,8 @@ static const struct ethtool_ops atl1c_ethtool_ops = { .get_link = ethtool_op_get_link, .get_eeprom_len = atl1c_get_eeprom_len, .get_eeprom = atl1c_get_eeprom, + .get_link_ksettings = atl1c_get_link_ksettings, + .set_link_ksettings = atl1c_set_link_ksettings, }; void atl1c_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index a3200ea6d765..773d3b7d8dd5 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -519,6 +519,26 @@ static int atl1c_set_features(struct net_device *netdev, return 0; } +static void atl1c_set_max_mtu(struct net_device *netdev) +{ + struct atl1c_adapter *adapter = netdev_priv(netdev); + struct atl1c_hw *hw = &adapter->hw; + + switch (hw->nic_type) { + /* These (GbE) devices support jumbo packets, max_mtu 6122 */ + case athr_l1c: + case athr_l1d: + case athr_l1d_2: + netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + break; + /* The 10/100 devices don't support jumbo packets, max_mtu 1500 */ + default: + netdev->max_mtu = ETH_DATA_LEN; + break; + } +} + /** * atl1c_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure @@ -529,22 +549,9 @@ static int atl1c_set_features(struct net_device *netdev, static int atl1c_change_mtu(struct net_device *netdev, int new_mtu) { struct atl1c_adapter *adapter = netdev_priv(netdev); - struct atl1c_hw *hw = &adapter->hw; - int old_mtu = netdev->mtu; - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - - /* Fast Ethernet controller doesn't support jumbo packet */ - if (((hw->nic_type == athr_l2c || - hw->nic_type == athr_l2c_b || - hw->nic_type == athr_l2c_b2) && new_mtu > ETH_DATA_LEN) || - max_frame < ETH_ZLEN + ETH_FCS_LEN || - max_frame > MAX_JUMBO_FRAME_SIZE) { - if (netif_msg_link(adapter)) - dev_warn(&adapter->pdev->dev, "invalid MTU setting\n"); - return -EINVAL; - } + /* set MTU */ - if (old_mtu != new_mtu && netif_running(netdev)) { + if (netif_running(netdev)) { while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); netdev->mtu = new_mtu; @@ -2511,6 +2518,7 @@ static int atl1c_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->netdev_ops = &atl1c_netdev_ops; netdev->watchdog_timeo = AT_TX_WATCHDOG; + netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN); atl1c_set_ethtool_ops(netdev); /* TODO: add when ready */ @@ -2613,6 +2621,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "net device private data init failed\n"); goto err_sw_init; } + /* set max MTU */ + atl1c_set_max_mtu(netdev); + atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE); /* Init GPHY as early as possible due to power saving issue */ diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c index 8e3dbd4d9f79..cb489e7e8374 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c @@ -26,73 +26,83 @@ #include "atl1e.h" -static int atl1e_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct atl1e_adapter *adapter = netdev_priv(netdev); struct atl1e_hw *hw = &adapter->hw; + u32 supported, advertising; - ecmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP); if (hw->nic_type == athr_l1e) - ecmd->supported |= SUPPORTED_1000baseT_Full; + supported |= SUPPORTED_1000baseT_Full; - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->advertising |= hw->autoneg_advertised; + advertising |= ADVERTISED_Autoneg; + advertising |= hw->autoneg_advertised; - ecmd->port = PORT_TP; - ecmd->phy_address = 0; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (adapter->link_speed != SPEED_0) { - ethtool_cmd_speed_set(ecmd, adapter->link_speed); + cmd->base.speed = adapter->link_speed; if (adapter->link_duplex == FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int atl1e_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct atl1e_adapter *adapter = netdev_priv(netdev); struct atl1e_hw *hw = &adapter->hw; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { u16 adv4, adv9; - if ((ecmd->advertising&ADVERTISE_1000_FULL)) { + if (advertising & ADVERTISE_1000_FULL) { if (hw->nic_type == athr_l1e) { hw->autoneg_advertised = - ecmd->advertising & AT_ADV_MASK; + advertising & AT_ADV_MASK; } else { clear_bit(__AT_RESETTING, &adapter->flags); return -EINVAL; } - } else if (ecmd->advertising&ADVERTISE_1000_HALF) { + } else if (advertising & ADVERTISE_1000_HALF) { clear_bit(__AT_RESETTING, &adapter->flags); return -EINVAL; } else { hw->autoneg_advertised = - ecmd->advertising & AT_ADV_MASK; + advertising & AT_ADV_MASK; } - ecmd->advertising = hw->autoneg_advertised | + advertising = hw->autoneg_advertised | ADVERTISED_TP | ADVERTISED_Autoneg; adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL; @@ -367,8 +377,6 @@ static int atl1e_nway_reset(struct net_device *netdev) } static const struct ethtool_ops atl1e_ethtool_ops = { - .get_settings = atl1e_get_settings, - .set_settings = atl1e_set_settings, .get_drvinfo = atl1e_get_drvinfo, .get_regs_len = atl1e_get_regs_len, .get_regs = atl1e_get_regs, @@ -380,6 +388,8 @@ static const struct ethtool_ops atl1e_ethtool_ops = { .get_eeprom_len = atl1e_get_eeprom_len, .get_eeprom = atl1e_get_eeprom, .set_eeprom = atl1e_set_eeprom, + .get_link_ksettings = atl1e_get_link_ksettings, + .set_link_ksettings = atl1e_set_link_ksettings, }; void atl1e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 974713b19ab6..e96091b652a7 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -439,16 +439,10 @@ static int atl1e_set_features(struct net_device *netdev, static int atl1e_change_mtu(struct net_device *netdev, int new_mtu) { struct atl1e_adapter *adapter = netdev_priv(netdev); - int old_mtu = netdev->mtu; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || - (max_frame > MAX_JUMBO_FRAME_SIZE)) { - netdev_warn(adapter->netdev, "invalid MTU setting\n"); - return -EINVAL; - } /* set MTU */ - if (old_mtu != new_mtu && netif_running(netdev)) { + if (netif_running(netdev)) { while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); netdev->mtu = new_mtu; @@ -2272,6 +2266,10 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->netdev_ops = &atl1e_netdev_ops; netdev->watchdog_timeo = AT_TX_WATCHDOG; + /* MTU range: 42 - 8170 */ + netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN); + netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); atl1e_set_ethtool_ops(netdev); netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 529bca718334..7dad8e4b9d2a 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2701,23 +2701,15 @@ static void atl1_reset_dev_task(struct work_struct *work) static int atl1_change_mtu(struct net_device *netdev, int new_mtu) { struct atl1_adapter *adapter = netdev_priv(netdev); - int old_mtu = netdev->mtu; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || - (max_frame > MAX_JUMBO_FRAME_SIZE)) { - if (netif_msg_link(adapter)) - dev_warn(&adapter->pdev->dev, "invalid MTU setting\n"); - return -EINVAL; - } - adapter->hw.max_frame_size = max_frame; adapter->hw.tx_jumbo_task_th = (max_frame + 7) >> 3; adapter->rx_buffer_len = (max_frame + 7) & ~7; adapter->hw.rx_jumbo_th = adapter->rx_buffer_len / 8; netdev->mtu = new_mtu; - if ((old_mtu != new_mtu) && netif_running(netdev)) { + if (netif_running(netdev)) { atl1_down(adapter); atl1_up(adapter); } @@ -3031,6 +3023,11 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* is this valid? see atl1_setup_mac_ctrl() */ netdev->features |= NETIF_F_RXCSUM; + /* MTU range: 42 - 10218 */ + netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN); + netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + /* * patch for some L1 of old version, * the final version of L1 may not need these @@ -3217,66 +3214,72 @@ static int atl1_get_sset_count(struct net_device *netdev, int sset) } } -static int atl1_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct atl1_adapter *adapter = netdev_priv(netdev); struct atl1_hw *hw = &adapter->hw; + u32 supported, advertising; - ecmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP); - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR || hw->media_type == MEDIA_TYPE_1000M_FULL) { - ecmd->advertising |= ADVERTISED_Autoneg; + advertising |= ADVERTISED_Autoneg; if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR) { - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->advertising |= + advertising |= ADVERTISED_Autoneg; + advertising |= (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | ADVERTISED_1000baseT_Full); } else - ecmd->advertising |= (ADVERTISED_1000baseT_Full); + advertising |= (ADVERTISED_1000baseT_Full); } - ecmd->port = PORT_TP; - ecmd->phy_address = 0; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (netif_carrier_ok(adapter->netdev)) { u16 link_speed, link_duplex; atl1_get_speed_and_duplex(hw, &link_speed, &link_duplex); - ethtool_cmd_speed_set(ecmd, link_speed); + cmd->base.speed = link_speed; if (link_duplex == FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR || hw->media_type == MEDIA_TYPE_1000M_FULL) - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - ecmd->autoneg = AUTONEG_DISABLE; + cmd->base.autoneg = AUTONEG_DISABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } -static int atl1_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct atl1_adapter *adapter = netdev_priv(netdev); struct atl1_hw *hw = &adapter->hw; u16 phy_data; int ret_val = 0; u16 old_media_type = hw->media_type; + u32 advertising; if (netif_running(adapter->netdev)) { if (netif_msg_link(adapter)) @@ -3285,12 +3288,12 @@ static int atl1_set_settings(struct net_device *netdev, atl1_down(adapter); } - if (ecmd->autoneg == AUTONEG_ENABLE) + if (cmd->base.autoneg == AUTONEG_ENABLE) hw->media_type = MEDIA_TYPE_AUTO_SENSOR; else { - u32 speed = ethtool_cmd_speed(ecmd); + u32 speed = cmd->base.speed; if (speed == SPEED_1000) { - if (ecmd->duplex != DUPLEX_FULL) { + if (cmd->base.duplex != DUPLEX_FULL) { if (netif_msg_link(adapter)) dev_warn(&adapter->pdev->dev, "1000M half is invalid\n"); @@ -3299,12 +3302,12 @@ static int atl1_set_settings(struct net_device *netdev, } hw->media_type = MEDIA_TYPE_1000M_FULL; } else if (speed == SPEED_100) { - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) hw->media_type = MEDIA_TYPE_100M_FULL; else hw->media_type = MEDIA_TYPE_100M_HALF; } else { - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) hw->media_type = MEDIA_TYPE_10M_FULL; else hw->media_type = MEDIA_TYPE_10M_HALF; @@ -3312,7 +3315,7 @@ static int atl1_set_settings(struct net_device *netdev, } switch (hw->media_type) { case MEDIA_TYPE_AUTO_SENSOR: - ecmd->advertising = + advertising = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | @@ -3321,12 +3324,12 @@ static int atl1_set_settings(struct net_device *netdev, ADVERTISED_Autoneg | ADVERTISED_TP; break; case MEDIA_TYPE_1000M_FULL: - ecmd->advertising = + advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | ADVERTISED_TP; break; default: - ecmd->advertising = 0; + advertising = 0; break; } if (atl1_phy_setup_autoneg_adv(hw)) { @@ -3666,8 +3669,6 @@ static int atl1_nway_reset(struct net_device *netdev) } static const struct ethtool_ops atl1_ethtool_ops = { - .get_settings = atl1_get_settings, - .set_settings = atl1_set_settings, .get_drvinfo = atl1_get_drvinfo, .get_wol = atl1_get_wol, .set_wol = atl1_set_wol, @@ -3684,6 +3685,8 @@ static const struct ethtool_ops atl1_ethtool_ops = { .nway_reset = atl1_nway_reset, .get_ethtool_stats = atl1_get_ethtool_stats, .get_sset_count = atl1_get_sset_count, + .get_link_ksettings = atl1_get_link_ksettings, + .set_link_ksettings = atl1_set_link_ksettings, }; module_pci_driver(atl1_driver); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 2ff465848b65..63f2deec2a52 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -253,7 +253,7 @@ static int atl2_configure(struct atl2_adapter *adapter) /* set MTU */ ATL2_WRITE_REG(hw, REG_MTU, adapter->netdev->mtu + - ENET_HEADER_SIZE + VLAN_SIZE + ETHERNET_FCS_SIZE); + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); /* 1590 */ ATL2_WRITE_REG(hw, REG_TX_CUT_THRESH, 0x177); @@ -925,15 +925,11 @@ static int atl2_change_mtu(struct net_device *netdev, int new_mtu) struct atl2_adapter *adapter = netdev_priv(netdev); struct atl2_hw *hw = &adapter->hw; - if ((new_mtu < 40) || (new_mtu > (ETH_DATA_LEN + VLAN_SIZE))) - return -EINVAL; - /* set MTU */ - if (hw->max_frame_size != new_mtu) { - netdev->mtu = new_mtu; - ATL2_WRITE_REG(hw, REG_MTU, new_mtu + ENET_HEADER_SIZE + - VLAN_SIZE + ETHERNET_FCS_SIZE); - } + netdev->mtu = new_mtu; + hw->max_frame_size = new_mtu; + ATL2_WRITE_REG(hw, REG_MTU, new_mtu + ETH_HLEN + + VLAN_HLEN + ETH_FCS_LEN); return 0; } @@ -1398,6 +1394,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &atl2_netdev_ops; netdev->ethtool_ops = &atl2_ethtool_ops; netdev->watchdog_timeo = 5 * HZ; + netdev->min_mtu = 40; + netdev->max_mtu = ETH_DATA_LEN + VLAN_HLEN; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); netdev->mem_start = mmio_start; @@ -1739,81 +1737,87 @@ static void atl2_write_pci_cfg(struct atl2_hw *hw, u32 reg, u16 *value) pci_write_config_word(adapter->pdev, reg, *value); } -static int atl2_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl2_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct atl2_adapter *adapter = netdev_priv(netdev); struct atl2_hw *hw = &adapter->hw; + u32 supported, advertising; - ecmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP); - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->advertising |= hw->autoneg_advertised; + advertising |= ADVERTISED_Autoneg; + advertising |= hw->autoneg_advertised; - ecmd->port = PORT_TP; - ecmd->phy_address = 0; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (adapter->link_speed != SPEED_0) { - ethtool_cmd_speed_set(ecmd, adapter->link_speed); + cmd->base.speed = adapter->link_speed; if (adapter->link_duplex == FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int atl2_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl2_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct atl2_adapter *adapter = netdev_priv(netdev); struct atl2_hw *hw = &adapter->hw; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); while (test_and_set_bit(__ATL2_RESETTING, &adapter->flags)) msleep(1); - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { #define MY_ADV_MASK (ADVERTISE_10_HALF | \ ADVERTISE_10_FULL | \ ADVERTISE_100_HALF| \ ADVERTISE_100_FULL) - if ((ecmd->advertising & MY_ADV_MASK) == MY_ADV_MASK) { + if ((advertising & MY_ADV_MASK) == MY_ADV_MASK) { hw->MediaType = MEDIA_TYPE_AUTO_SENSOR; hw->autoneg_advertised = MY_ADV_MASK; - } else if ((ecmd->advertising & MY_ADV_MASK) == - ADVERTISE_100_FULL) { + } else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_FULL) { hw->MediaType = MEDIA_TYPE_100M_FULL; hw->autoneg_advertised = ADVERTISE_100_FULL; - } else if ((ecmd->advertising & MY_ADV_MASK) == - ADVERTISE_100_HALF) { + } else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_HALF) { hw->MediaType = MEDIA_TYPE_100M_HALF; hw->autoneg_advertised = ADVERTISE_100_HALF; - } else if ((ecmd->advertising & MY_ADV_MASK) == - ADVERTISE_10_FULL) { + } else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_FULL) { hw->MediaType = MEDIA_TYPE_10M_FULL; hw->autoneg_advertised = ADVERTISE_10_FULL; - } else if ((ecmd->advertising & MY_ADV_MASK) == - ADVERTISE_10_HALF) { + } else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_HALF) { hw->MediaType = MEDIA_TYPE_10M_HALF; hw->autoneg_advertised = ADVERTISE_10_HALF; } else { clear_bit(__ATL2_RESETTING, &adapter->flags); return -EINVAL; } - ecmd->advertising = hw->autoneg_advertised | + advertising = hw->autoneg_advertised | ADVERTISED_TP | ADVERTISED_Autoneg; } else { clear_bit(__ATL2_RESETTING, &adapter->flags); @@ -2082,8 +2086,6 @@ static int atl2_nway_reset(struct net_device *netdev) } static const struct ethtool_ops atl2_ethtool_ops = { - .get_settings = atl2_get_settings, - .set_settings = atl2_set_settings, .get_drvinfo = atl2_get_drvinfo, .get_regs_len = atl2_get_regs_len, .get_regs = atl2_get_regs, @@ -2096,6 +2098,8 @@ static const struct ethtool_ops atl2_ethtool_ops = { .get_eeprom_len = atl2_get_eeprom_len, .get_eeprom = atl2_get_eeprom, .set_eeprom = atl2_set_eeprom, + .get_link_ksettings = atl2_get_link_ksettings, + .set_link_ksettings = atl2_set_link_ksettings, }; #define LBYTESWAP(a) ((((a) & 0x00ff00ff) << 8) | \ diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h index 2f27d4c4c3ad..c64a6bdfa7ae 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.h +++ b/drivers/net/ethernet/atheros/atlx/atl2.h @@ -228,12 +228,9 @@ static void atl2_force_ps(struct atl2_hw *hw); #define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x000F /* Everything */ /* The size (in bytes) of a ethernet packet */ -#define ENET_HEADER_SIZE 14 #define MAXIMUM_ETHERNET_FRAME_SIZE 1518 /* with FCS */ #define MINIMUM_ETHERNET_FRAME_SIZE 64 /* with FCS */ -#define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x2000 -#define VLAN_SIZE 4 struct tx_pkt_header { unsigned pkt_size:11; diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index 00c38bf151e6..b59aa3541270 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -975,8 +975,10 @@ static int nb8800_open(struct net_device *dev) phydev = of_phy_connect(dev, priv->phy_node, nb8800_link_reconfigure, 0, priv->phy_mode); - if (!phydev) + if (!phydev) { + err = -ENODEV; goto err_free_irq; + } nb8800_pause_adv(dev); @@ -1032,7 +1034,6 @@ static const struct net_device_ops nb8800_netdev_ops = { .ndo_set_mac_address = nb8800_set_mac_address, .ndo_set_rx_mode = nb8800_set_rx_mode, .ndo_do_ioctl = nb8800_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 17aa33c5567d..1df3048a3cdb 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -59,8 +59,8 @@ #define B44_TX_TIMEOUT (5 * HZ) /* hardware minimum and maximum for a single frame's data payload */ -#define B44_MIN_MTU 60 -#define B44_MAX_MTU 1500 +#define B44_MIN_MTU ETH_ZLEN +#define B44_MAX_MTU ETH_DATA_LEN #define B44_RX_RING_SIZE 512 #define B44_DEF_RX_RING_PENDING 200 @@ -1064,9 +1064,6 @@ static int b44_change_mtu(struct net_device *dev, int new_mtu) { struct b44 *bp = netdev_priv(dev); - if (new_mtu < B44_MIN_MTU || new_mtu > B44_MAX_MTU) - return -EINVAL; - if (!netif_running(dev)) { /* We'll just catch it later when the * device is up'd. @@ -2377,6 +2374,8 @@ static int b44_init_one(struct ssb_device *sdev, dev->netdev_ops = &b44_netdev_ops; netif_napi_add(dev, &bp->napi, b44_poll, 64); dev->watchdog_timeo = B44_TX_TIMEOUT; + dev->min_mtu = B44_MIN_MTU; + dev->max_mtu = B44_MAX_MTU; dev->irq = sdev->irq; dev->ethtool_ops = &b44_ethtool_ops; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 537090952c45..5c7acef1de2e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1623,20 +1623,19 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } /* - * calculate actual hardware mtu + * adjust mtu, can't be called while device is running */ -static int compute_hw_mtu(struct bcm_enet_priv *priv, int mtu) +static int bcm_enet_change_mtu(struct net_device *dev, int new_mtu) { - int actual_mtu; + struct bcm_enet_priv *priv = netdev_priv(dev); + int actual_mtu = new_mtu; - actual_mtu = mtu; + if (netif_running(dev)) + return -EBUSY; /* add ethernet header + vlan tag size */ actual_mtu += VLAN_ETH_HLEN; - if (actual_mtu < 64 || actual_mtu > BCMENET_MAX_MTU) - return -EINVAL; - /* * setup maximum size before we get overflow mark in * descriptor, note that this will not prevent reception of @@ -1651,22 +1650,7 @@ static int compute_hw_mtu(struct bcm_enet_priv *priv, int mtu) */ priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN, priv->dma_maxburst * 4); - return 0; -} -/* - * adjust mtu, can't be called while device is running - */ -static int bcm_enet_change_mtu(struct net_device *dev, int new_mtu) -{ - int ret; - - if (netif_running(dev)) - return -EBUSY; - - ret = compute_hw_mtu(netdev_priv(dev), new_mtu); - if (ret) - return ret; dev->mtu = new_mtu; return 0; } @@ -1756,7 +1740,7 @@ static int bcm_enet_probe(struct platform_device *pdev) priv->enet_is_sw = false; priv->dma_maxburst = BCMENET_DMA_MAXBURST; - ret = compute_hw_mtu(priv, dev->mtu); + ret = bcm_enet_change_mtu(dev, dev->mtu); if (ret) goto out; @@ -1889,6 +1873,9 @@ static int bcm_enet_probe(struct platform_device *pdev) netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16); dev->ethtool_ops = &bcm_enet_ethtool_ops; + /* MTU range: 46 - 2028 */ + dev->min_mtu = ETH_ZLEN - ETH_HLEN; + dev->max_mtu = BCMENET_MAX_MTU - VLAN_ETH_HLEN; SET_NETDEV_DEV(dev, &pdev->dev); ret = register_netdev(dev); @@ -2743,7 +2730,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) priv->dma_chan_width = pd->dma_chan_width; } - ret = compute_hw_mtu(priv, dev->mtu); + ret = bcm_enet_change_mtu(dev, dev->mtu); if (ret) goto out; diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index c16ec3a51876..4a4ffc0c4c65 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -80,6 +80,24 @@ static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask, bcma_maskset32(bgmac->bcma.cmn, offset, mask, set); } +static int bcma_phy_connect(struct bgmac *bgmac) +{ + struct phy_device *phy_dev; + char bus_id[MII_BUS_ID_SIZE + 3]; + + /* Connect to the PHY */ + snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id, + bgmac->phyaddr); + phy_dev = phy_connect(bgmac->net_dev, bus_id, bgmac_adjust_link, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(phy_dev)) { + dev_err(bgmac->dev, "PHY connection failed\n"); + return PTR_ERR(phy_dev); + } + + return 0; +} + static const struct bcma_device_id bgmac_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), @@ -275,6 +293,10 @@ static int bgmac_probe(struct bcma_device *core) bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset; bgmac->get_bus_clock = bcma_bgmac_get_bus_clock; bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32; + if (bgmac->mii_bus) + bgmac->phy_connect = bcma_phy_connect; + else + bgmac->phy_connect = bgmac_phy_connect_direct; err = bgmac_enet_probe(bgmac); if (err) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index be52f270c2c1..6f736c19872f 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -14,11 +14,21 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/bcma/bcma.h> +#include <linux/brcmphy.h> #include <linux/etherdevice.h> #include <linux/of_address.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include "bgmac.h" +#define NICPM_IOMUX_CTRL 0x00000008 + +#define NICPM_IOMUX_CTRL_INIT_VAL 0x3196e000 +#define NICPM_IOMUX_CTRL_SPD_SHIFT 10 +#define NICPM_IOMUX_CTRL_SPD_10M 0 +#define NICPM_IOMUX_CTRL_SPD_100M 1 +#define NICPM_IOMUX_CTRL_SPD_1000M 2 + static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset) { return readl(bgmac->plat.base + offset); @@ -86,6 +96,54 @@ static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, WARN_ON(1); } +static void bgmac_nicpm_speed_set(struct net_device *net_dev) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + u32 val; + + if (!bgmac->plat.nicpm_base) + return; + + val = NICPM_IOMUX_CTRL_INIT_VAL; + switch (bgmac->net_dev->phydev->speed) { + default: + netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n"); + case SPEED_1000: + val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + case SPEED_100: + val |= NICPM_IOMUX_CTRL_SPD_100M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + case SPEED_10: + val |= NICPM_IOMUX_CTRL_SPD_10M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + } + + writel(val, bgmac->plat.nicpm_base + NICPM_IOMUX_CTRL); + + bgmac_adjust_link(bgmac->net_dev); +} + +static int platform_phy_connect(struct bgmac *bgmac) +{ + struct phy_device *phy_dev; + + if (bgmac->plat.nicpm_base) + phy_dev = of_phy_get_and_connect(bgmac->net_dev, + bgmac->dev->of_node, + bgmac_nicpm_speed_set); + else + phy_dev = of_phy_get_and_connect(bgmac->net_dev, + bgmac->dev->of_node, + bgmac_adjust_link); + if (!phy_dev) { + dev_err(bgmac->dev, "PHY connection failed\n"); + return -ENODEV; + } + + return 0; +} + static int bgmac_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -102,7 +160,6 @@ static int bgmac_probe(struct platform_device *pdev) /* Set the features of the 4707 family */ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; - bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4; bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP; bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP; @@ -142,6 +199,14 @@ static int bgmac_probe(struct platform_device *pdev) if (IS_ERR(bgmac->plat.idm_base)) return PTR_ERR(bgmac->plat.idm_base); + regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base"); + if (regs) { + bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev, + regs); + if (IS_ERR(bgmac->plat.nicpm_base)) + return PTR_ERR(bgmac->plat.nicpm_base); + } + bgmac->read = platform_bgmac_read; bgmac->write = platform_bgmac_write; bgmac->idm_read = platform_bgmac_idm_read; @@ -151,6 +216,12 @@ static int bgmac_probe(struct platform_device *pdev) bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset; bgmac->get_bus_clock = platform_bgmac_get_bus_clock; bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32; + if (of_parse_phandle(np, "phy-handle", 0)) { + bgmac->phy_connect = platform_phy_connect; + } else { + bgmac->phy_connect = bgmac_phy_connect_direct; + bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; + } return bgmac_enet_probe(bgmac); } @@ -167,6 +238,7 @@ static int bgmac_remove(struct platform_device *pdev) static const struct of_device_id bgmac_of_enet_match[] = { {.compatible = "brcm,amac",}, {.compatible = "brcm,nsp-amac",}, + {.compatible = "brcm,ns2-amac",}, {}, }; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 49f4cafe5438..0e066dc6b8cc 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1085,6 +1085,9 @@ static void bgmac_enable(struct bgmac *bgmac) /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */ static void bgmac_chip_init(struct bgmac *bgmac) { + /* Clear any erroneously pending interrupts */ + bgmac_write(bgmac, BGMAC_INT_STATUS, ~0); + /* 1 interrupt per received frame */ bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT); @@ -1391,7 +1394,7 @@ static const struct ethtool_ops bgmac_ethtool_ops = { * MII **************************************************/ -static void bgmac_adjust_link(struct net_device *net_dev) +void bgmac_adjust_link(struct net_device *net_dev) { struct bgmac *bgmac = netdev_priv(net_dev); struct phy_device *phy_dev = net_dev->phydev; @@ -1414,8 +1417,9 @@ static void bgmac_adjust_link(struct net_device *net_dev) phy_print_status(phy_dev); } } +EXPORT_SYMBOL_GPL(bgmac_adjust_link); -static int bgmac_phy_connect_direct(struct bgmac *bgmac) +int bgmac_phy_connect_direct(struct bgmac *bgmac) { struct fixed_phy_status fphy_status = { .link = 1, @@ -1440,24 +1444,7 @@ static int bgmac_phy_connect_direct(struct bgmac *bgmac) return err; } - -static int bgmac_phy_connect(struct bgmac *bgmac) -{ - struct phy_device *phy_dev; - char bus_id[MII_BUS_ID_SIZE + 3]; - - /* Connect to the PHY */ - snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id, - bgmac->phyaddr); - phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link, - PHY_INTERFACE_MODE_MII); - if (IS_ERR(phy_dev)) { - dev_err(bgmac->dev, "PHY connection failed\n"); - return PTR_ERR(phy_dev); - } - - return 0; -} +EXPORT_SYMBOL_GPL(bgmac_phy_connect_direct); int bgmac_enet_probe(struct bgmac *info) { @@ -1510,10 +1497,7 @@ int bgmac_enet_probe(struct bgmac *info) netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT); - if (!bgmac->mii_bus) - err = bgmac_phy_connect_direct(bgmac); - else - err = bgmac_phy_connect(bgmac); + err = bgmac_phy_connect(bgmac); if (err) { dev_err(bgmac->dev, "Cannot connect to phy\n"); goto err_dma_free; diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 80836b4c9f38..71f493f2451f 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -463,6 +463,7 @@ struct bgmac { struct { void *base; void *idm_base; + void *nicpm_base; } plat; struct { struct bcma_device *core; @@ -513,10 +514,13 @@ struct bgmac { u32 (*get_bus_clock)(struct bgmac *bgmac); void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask, u32 set); + int (*phy_connect)(struct bgmac *bgmac); }; int bgmac_enet_probe(struct bgmac *info); void bgmac_enet_remove(struct bgmac *bgmac); +void bgmac_adjust_link(struct net_device *net_dev); +int bgmac_phy_connect_direct(struct bgmac *bgmac); struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr); void bcma_mdio_mii_unregister(struct mii_bus *mii_bus); @@ -583,4 +587,9 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set) { bgmac_maskset(bgmac, offset, ~0, set); } + +static inline int bgmac_phy_connect(struct bgmac *bgmac) +{ + return bgmac->phy_connect(bgmac); +} #endif /* _BGMAC_H */ diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 1f7034d739b0..7baf30082ab3 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2304,7 +2304,7 @@ bnx2_init_5706s_phy(struct bnx2 *bp, int reset_phy) if (BNX2_CHIP(bp) == BNX2_CHIP_5706) BNX2_WR(bp, BNX2_MISC_GP_HW_CTL0, 0x300); - if (bp->dev->mtu > 1500) { + if (bp->dev->mtu > ETH_DATA_LEN) { u32 val; /* Set extended packet length bit */ @@ -2358,7 +2358,7 @@ bnx2_init_copper_phy(struct bnx2 *bp, int reset_phy) bnx2_write_phy(bp, MII_BNX2_DSP_RW_PORT, val); } - if (bp->dev->mtu > 1500) { + if (bp->dev->mtu > ETH_DATA_LEN) { /* Set extended packet length bit */ bnx2_write_phy(bp, 0x18, 0x7); bnx2_read_phy(bp, 0x18, &val); @@ -5007,12 +5007,12 @@ bnx2_init_chip(struct bnx2 *bp) /* Program the MTU. Also include 4 bytes for CRC32. */ mtu = bp->dev->mtu; val = mtu + ETH_HLEN + ETH_FCS_LEN; - if (val > (MAX_ETHERNET_PACKET_SIZE + 4)) + if (val > (MAX_ETHERNET_PACKET_SIZE + ETH_HLEN + 4)) val |= BNX2_EMAC_RX_MTU_SIZE_JUMBO_ENA; BNX2_WR(bp, BNX2_EMAC_RX_MTU_SIZE, val); - if (mtu < 1500) - mtu = 1500; + if (mtu < ETH_DATA_LEN) + mtu = ETH_DATA_LEN; bnx2_reg_wr_ind(bp, BNX2_RBUF_CONFIG, BNX2_RBUF_CONFIG_VAL(mtu)); bnx2_reg_wr_ind(bp, BNX2_RBUF_CONFIG2, BNX2_RBUF_CONFIG2_VAL(mtu)); @@ -7923,10 +7923,6 @@ bnx2_change_mtu(struct net_device *dev, int new_mtu) { struct bnx2 *bp = netdev_priv(dev); - if (((new_mtu + ETH_HLEN) > MAX_ETHERNET_JUMBO_PACKET_SIZE) || - ((new_mtu + ETH_HLEN) < MIN_ETHERNET_PACKET_SIZE)) - return -EINVAL; - dev->mtu = new_mtu; return bnx2_change_ring_size(bp, bp->rx_ring_size, bp->tx_ring_size, false); @@ -8619,6 +8615,8 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= dev->hw_features; dev->priv_flags |= IFF_UNICAST_FLT; + dev->min_mtu = MIN_ETHERNET_PACKET_SIZE; + dev->max_mtu = MAX_ETHERNET_JUMBO_PACKET_SIZE; if (!(bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)) dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index 380234d72b95..a09ec47461c9 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -6530,9 +6530,9 @@ struct l2_fhdr { #define MII_BNX2_AER_AER_AN_MMD 0x3800 #define MII_BNX2_BLK_ADDR_COMBO_IEEEB0 0xffe0 -#define MIN_ETHERNET_PACKET_SIZE 60 -#define MAX_ETHERNET_PACKET_SIZE 1514 -#define MAX_ETHERNET_JUMBO_PACKET_SIZE 9014 +#define MIN_ETHERNET_PACKET_SIZE (ETH_ZLEN - ETH_HLEN) +#define MAX_ETHERNET_PACKET_SIZE ETH_DATA_LEN +#define MAX_ETHERNET_JUMBO_PACKET_SIZE 9000 #define BNX2_RX_COPY_THRESH 128 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 7dd7490fdac1..0a23034bbe3f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1396,9 +1396,9 @@ struct bnx2x { int tx_ring_size; /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */ -#define ETH_OVREHEAD (ETH_HLEN + 8 + 8) -#define ETH_MIN_PACKET_SIZE 60 -#define ETH_MAX_PACKET_SIZE 1500 +#define ETH_OVERHEAD (ETH_HLEN + 8 + 8) +#define ETH_MIN_PACKET_SIZE (ETH_ZLEN - ETH_HLEN) +#define ETH_MAX_PACKET_SIZE ETH_DATA_LEN #define ETH_MAX_JUMBO_PACKET_SIZE 9600 /* TCP with Timestamp Option (32) + IPv6 (40) */ #define ETH_MAX_TPA_HEADER_SIZE 72 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 0a9108cd4c45..ed42c1009685 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2023,7 +2023,7 @@ static void bnx2x_set_rx_buf_size(struct bnx2x *bp) mtu = bp->dev->mtu; fp->rx_buf_size = BNX2X_FW_RX_ALIGN_START + IP_HEADER_ALIGNMENT_PADDING + - ETH_OVREHEAD + + ETH_OVERHEAD + mtu + BNX2X_FW_RX_ALIGN_END; /* Note : rx_buf_size doesn't take into account NET_SKB_PAD */ @@ -4855,12 +4855,6 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu) return -EAGAIN; } - if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) || - ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE)) { - BNX2X_ERR("Can't support requested MTU size\n"); - return -EINVAL; - } - /* This does not race with packet allocation * because the actual alloc size is * only updated as part of load diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 1fb80100e5e7..05356efdbf93 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -34,12 +34,6 @@ typedef int (*read_sfp_module_eeprom_func_p)(struct bnx2x_phy *phy, u8 dev_addr, u16 addr, u8 byte_cnt, u8 *o_buf, u8); /********************************************************/ -#define ETH_HLEN 14 -/* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */ -#define ETH_OVREHEAD (ETH_HLEN + 8 + 8) -#define ETH_MIN_PACKET_SIZE 60 -#define ETH_MAX_PACKET_SIZE 1500 -#define ETH_MAX_JUMBO_PACKET_SIZE 9600 #define MDIO_ACCESS_TIMEOUT 1000 #define WC_LANE_MAX 4 #define I2C_SWITCH_WIDTH 2 @@ -1917,7 +1911,7 @@ static int bnx2x_emac_enable(struct link_params *params, /* Enable emac for jumbo packets */ EMAC_WR(bp, EMAC_REG_EMAC_RX_MTU_SIZE, (EMAC_RX_MTU_SIZE_JUMBO_ENA | - (ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD))); + (ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD))); /* Strip CRC */ REG_WR(bp, NIG_REG_NIG_INGRESS_EMAC0_NO_CRC + port*4, 0x1); @@ -2314,19 +2308,19 @@ static int bnx2x_bmac1_enable(struct link_params *params, REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_BMAC_CONTROL, wb_data, 2); /* Set rx mtu */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_MAX_SIZE, wb_data, 2); bnx2x_update_pfc_bmac1(params, vars); /* Set tx mtu */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_TX_MAX_SIZE, wb_data, 2); /* Set cnt max size */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_CNT_MAX_SIZE, wb_data, 2); @@ -2384,18 +2378,18 @@ static int bnx2x_bmac2_enable(struct link_params *params, udelay(30); /* Set RX MTU */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_RX_MAX_SIZE, wb_data, 2); udelay(30); /* Set TX MTU */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_TX_MAX_SIZE, wb_data, 2); udelay(30); /* Set cnt max size */ - wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD - 2; + wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD - 2; wb_data[1] = 0; REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_CNT_MAX_SIZE, wb_data, 2); udelay(30); @@ -2516,7 +2510,7 @@ static int bnx2x_pbf_update(struct link_params *params, u32 flow_ctrl, } else { u32 thresh = (ETH_MAX_JUMBO_PACKET_SIZE + - ETH_OVREHEAD)/16; + ETH_OVERHEAD)/16; REG_WR(bp, PBF_REG_P0_PAUSE_ENABLE + port*4, 0); /* Update threshold */ REG_WR(bp, PBF_REG_P0_ARB_THRSH + port*4, thresh); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0cee4c0283f9..ab990da677d5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12080,8 +12080,7 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp) mtu_size, mtu); /* if valid: update device mtu */ - if (((mtu_size + ETH_HLEN) >= - ETH_MIN_PACKET_SIZE) && + if ((mtu_size >= ETH_MIN_PACKET_SIZE) && (mtu_size <= ETH_MAX_JUMBO_PACKET_SIZE)) bp->dev->mtu = mtu_size; @@ -13315,6 +13314,10 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->dcbnl_ops = &bnx2x_dcbnl_ops; #endif + /* MTU range, 46 - 9600 */ + dev->min_mtu = ETH_MIN_PACKET_SIZE; + dev->max_mtu = ETH_MAX_JUMBO_PACKET_SIZE; + /* get_port_hwinfo() will set prtad and mmds properly */ bp->mdio.prtad = MDIO_PRTAD_NONE; bp->mdio.mmds = 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c6909660e097..27a2dd917643 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4954,7 +4954,6 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[cp_nr_rings]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll_nitroa0, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; @@ -6290,9 +6289,6 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) { struct bnxt *bp = netdev_priv(dev); - if (new_mtu < 60 || new_mtu > 9500) - return -EINVAL; - if (netif_running(dev)) bnxt_close_nic(bp, false, false); @@ -6871,6 +6867,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features |= dev->hw_features | NETIF_F_HIGHDMA; dev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 60 - 9500 */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = 9500; + #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); #endif diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index f1b81187a201..435a2e4739d1 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2147,15 +2147,6 @@ static void sbmac_setmulti(struct sbmac_softc *sc) } } -static int sb1250_change_mtu(struct net_device *_dev, int new_mtu) -{ - if (new_mtu > ENET_PACKET_SIZE) - return -EINVAL; - _dev->mtu = new_mtu; - pr_info("changing the mtu to %d\n", new_mtu); - return 0; -} - static const struct net_device_ops sbmac_netdev_ops = { .ndo_open = sbmac_open, .ndo_stop = sbmac_close, @@ -2163,7 +2154,6 @@ static const struct net_device_ops sbmac_netdev_ops = { .ndo_set_rx_mode = sbmac_set_rx_mode, .ndo_tx_timeout = sbmac_tx_timeout, .ndo_do_ioctl = sbmac_mii_ioctl, - .ndo_change_mtu = sb1250_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2229,6 +2219,8 @@ static int sbmac_init(struct platform_device *pldev, long long base) dev->netdev_ops = &sbmac_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; + dev->min_mtu = 0; + dev->max_mtu = ENET_PACKET_SIZE; netif_napi_add(dev, &sc->napi, sbmac_poll, 16); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a927a730da10..185e9e047aa9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -124,7 +124,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define TG3_TX_TIMEOUT (5 * HZ) /* hardware minimum and maximum for a single frame's data payload */ -#define TG3_MIN_MTU 60 +#define TG3_MIN_MTU ETH_ZLEN #define TG3_MAX_MTU(tp) \ (tg3_flag(tp, JUMBO_CAPABLE) ? 9000 : 1500) @@ -14199,9 +14199,6 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) int err; bool reset_phy = false; - if (new_mtu < TG3_MIN_MTU || new_mtu > TG3_MAX_MTU(tp)) - return -EINVAL; - if (!netif_running(dev)) { /* We'll just catch it later when the * device is up'd. @@ -17799,6 +17796,10 @@ static int tg3_init_one(struct pci_dev *pdev, dev->hw_features |= features; dev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 60 - 9000 or 1500, depending on hardware */ + dev->min_mtu = TG3_MIN_MTU; + dev->max_mtu = TG3_MAX_MTU(tp); + if (tg3_chip_rev_id(tp) == CHIPREV_ID_5705_A1 && !tg3_flag(tp, TSO_CAPABLE) && !(tr32(TG3PCI_PCISTATE) & PCISTATE_BUS_SPEED_HIGH)) { diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index f42f672b0e7e..112030828c4b 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3296,9 +3296,6 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu) struct bnad *bnad = netdev_priv(netdev); u32 rx_count = 0, frame, new_frame; - if (new_mtu + ETH_HLEN < ETH_ZLEN || new_mtu > BNAD_JUMBO_MTU) - return -EINVAL; - mutex_lock(&bnad->conf_mutex); mtu = netdev->mtu; @@ -3465,6 +3462,10 @@ bnad_netdev_init(struct bnad *bnad, bool using_dac) netdev->mem_start = bnad->mmio_start; netdev->mem_end = bnad->mmio_start + bnad->mmio_len - 1; + /* MTU range: 46 - 9000 */ + netdev->min_mtu = ETH_ZLEN - ETH_HLEN; + netdev->max_mtu = BNAD_JUMBO_MTU; + netdev->netdev_ops = &bnad_netdev_ops; bnad_set_ethtool_ops(netdev); } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index b32444a3ed79..e1847ce6308d 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -37,14 +37,21 @@ #define MACB_RX_BUFFER_SIZE 128 #define RX_BUFFER_MULTIPLE 64 /* bytes */ -#define RX_RING_SIZE 512 /* must be power of 2 */ -#define RX_RING_BYTES (sizeof(struct macb_dma_desc) * RX_RING_SIZE) -#define TX_RING_SIZE 128 /* must be power of 2 */ -#define TX_RING_BYTES (sizeof(struct macb_dma_desc) * TX_RING_SIZE) +#define DEFAULT_RX_RING_SIZE 512 /* must be power of 2 */ +#define MIN_RX_RING_SIZE 64 +#define MAX_RX_RING_SIZE 8192 +#define RX_RING_BYTES(bp) (sizeof(struct macb_dma_desc) \ + * (bp)->rx_ring_size) + +#define DEFAULT_TX_RING_SIZE 512 /* must be power of 2 */ +#define MIN_TX_RING_SIZE 64 +#define MAX_TX_RING_SIZE 4096 +#define TX_RING_BYTES(bp) (sizeof(struct macb_dma_desc) \ + * (bp)->tx_ring_size) /* level of occupied TX descriptors under which we wake up TX process */ -#define MACB_TX_WAKEUP_THRESH (3 * TX_RING_SIZE / 4) +#define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) #define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ | MACB_BIT(ISR_ROVR)) @@ -56,7 +63,7 @@ #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1)) #define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1)) -#define GEM_MTU_MIN_SIZE 68 +#define GEM_MTU_MIN_SIZE ETH_MIN_MTU #define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0) #define MACB_WOL_ENABLED (0x1 << 1) @@ -67,45 +74,47 @@ #define MACB_HALT_TIMEOUT 1230 /* Ring buffer accessors */ -static unsigned int macb_tx_ring_wrap(unsigned int index) +static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index) { - return index & (TX_RING_SIZE - 1); + return index & (bp->tx_ring_size - 1); } static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue, unsigned int index) { - return &queue->tx_ring[macb_tx_ring_wrap(index)]; + return &queue->tx_ring[macb_tx_ring_wrap(queue->bp, index)]; } static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue, unsigned int index) { - return &queue->tx_skb[macb_tx_ring_wrap(index)]; + return &queue->tx_skb[macb_tx_ring_wrap(queue->bp, index)]; } static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index) { dma_addr_t offset; - offset = macb_tx_ring_wrap(index) * sizeof(struct macb_dma_desc); + offset = macb_tx_ring_wrap(queue->bp, index) * + sizeof(struct macb_dma_desc); return queue->tx_ring_dma + offset; } -static unsigned int macb_rx_ring_wrap(unsigned int index) +static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index) { - return index & (RX_RING_SIZE - 1); + return index & (bp->rx_ring_size - 1); } static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index) { - return &bp->rx_ring[macb_rx_ring_wrap(index)]; + return &bp->rx_ring[macb_rx_ring_wrap(bp, index)]; } static void *macb_rx_buffer(struct macb *bp, unsigned int index) { - return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index); + return bp->rx_buffers + bp->rx_buffer_size * + macb_rx_ring_wrap(bp, index); } /* I/O accessors */ @@ -608,7 +617,8 @@ static void macb_tx_error_task(struct work_struct *work) */ if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) { netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n", - macb_tx_ring_wrap(tail), skb->data); + macb_tx_ring_wrap(bp, tail), + skb->data); bp->stats.tx_packets++; bp->stats.tx_bytes += skb->len; } @@ -700,7 +710,8 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n", - macb_tx_ring_wrap(tail), skb->data); + macb_tx_ring_wrap(bp, tail), + skb->data); bp->stats.tx_packets++; bp->stats.tx_bytes += skb->len; } @@ -720,7 +731,7 @@ static void macb_tx_interrupt(struct macb_queue *queue) queue->tx_tail = tail; if (__netif_subqueue_stopped(bp->dev, queue_index) && CIRC_CNT(queue->tx_head, queue->tx_tail, - TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH) + bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp)) netif_wake_subqueue(bp->dev, queue_index); } @@ -731,8 +742,8 @@ static void gem_rx_refill(struct macb *bp) dma_addr_t paddr; while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, - RX_RING_SIZE) > 0) { - entry = macb_rx_ring_wrap(bp->rx_prepared_head); + bp->rx_ring_size) > 0) { + entry = macb_rx_ring_wrap(bp, bp->rx_prepared_head); /* Make hw descriptor updates visible to CPU */ rmb(); @@ -759,7 +770,7 @@ static void gem_rx_refill(struct macb *bp) bp->rx_skbuff[entry] = skb; - if (entry == RX_RING_SIZE - 1) + if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); macb_set_addr(&(bp->rx_ring[entry]), paddr); bp->rx_ring[entry].ctrl = 0; @@ -813,7 +824,7 @@ static int gem_rx(struct macb *bp, int budget) dma_addr_t addr; bool rxused; - entry = macb_rx_ring_wrap(bp->rx_tail); + entry = macb_rx_ring_wrap(bp, bp->rx_tail); desc = &bp->rx_ring[entry]; /* Make hw descriptor updates visible to CPU */ @@ -895,8 +906,8 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, len = desc->ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", - macb_rx_ring_wrap(first_frag), - macb_rx_ring_wrap(last_frag), len); + macb_rx_ring_wrap(bp, first_frag), + macb_rx_ring_wrap(bp, last_frag), len); /* The ethernet header starts NET_IP_ALIGN bytes into the * first buffer. Since the header is 14 bytes, this makes the @@ -969,12 +980,12 @@ static inline void macb_init_rx_ring(struct macb *bp) int i; addr = bp->rx_buffers_dma; - for (i = 0; i < RX_RING_SIZE; i++) { + for (i = 0; i < bp->rx_ring_size; i++) { bp->rx_ring[i].addr = addr; bp->rx_ring[i].ctrl = 0; addr += bp->rx_buffer_size; } - bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); + bp->rx_ring[bp->rx_ring_size - 1].addr |= MACB_BIT(RX_WRAP); } static int macb_rx(struct macb *bp, int budget) @@ -1228,7 +1239,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { size = min(len, bp->max_tx_length); - entry = macb_tx_ring_wrap(tx_head); + entry = macb_tx_ring_wrap(bp, tx_head); tx_skb = &queue->tx_skb[entry]; mapping = dma_map_single(&bp->pdev->dev, @@ -1257,7 +1268,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { size = min(len, bp->max_tx_length); - entry = macb_tx_ring_wrap(tx_head); + entry = macb_tx_ring_wrap(bp, tx_head); tx_skb = &queue->tx_skb[entry]; mapping = skb_frag_dma_map(&bp->pdev->dev, frag, @@ -1295,14 +1306,14 @@ static unsigned int macb_tx_map(struct macb *bp, * to set the end of TX queue */ i = tx_head; - entry = macb_tx_ring_wrap(i); + entry = macb_tx_ring_wrap(bp, i); ctrl = MACB_BIT(TX_USED); desc = &queue->tx_ring[entry]; desc->ctrl = ctrl; do { i--; - entry = macb_tx_ring_wrap(i); + entry = macb_tx_ring_wrap(bp, i); tx_skb = &queue->tx_skb[entry]; desc = &queue->tx_ring[entry]; @@ -1311,7 +1322,7 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BIT(TX_LAST); eof = 0; } - if (unlikely(entry == (TX_RING_SIZE - 1))) + if (unlikely(entry == (bp->tx_ring_size - 1))) ctrl |= MACB_BIT(TX_WRAP); /* Set TX buffer descriptor */ @@ -1388,7 +1399,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&bp->lock, flags); /* This is a hard error, log it. */ - if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < count) { + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, + bp->tx_ring_size) < count) { netif_stop_subqueue(dev, queue_index); spin_unlock_irqrestore(&bp->lock, flags); netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n", @@ -1414,7 +1426,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < 1) + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) netif_stop_subqueue(dev, queue_index); unlock: @@ -1453,7 +1465,7 @@ static void gem_free_rx_buffers(struct macb *bp) if (!bp->rx_skbuff) return; - for (i = 0; i < RX_RING_SIZE; i++) { + for (i = 0; i < bp->rx_ring_size; i++) { skb = bp->rx_skbuff[i]; if (!skb) @@ -1478,7 +1490,7 @@ static void macb_free_rx_buffers(struct macb *bp) { if (bp->rx_buffers) { dma_free_coherent(&bp->pdev->dev, - RX_RING_SIZE * bp->rx_buffer_size, + bp->rx_ring_size * bp->rx_buffer_size, bp->rx_buffers, bp->rx_buffers_dma); bp->rx_buffers = NULL; } @@ -1491,7 +1503,7 @@ static void macb_free_consistent(struct macb *bp) bp->macbgem_ops.mog_free_rx_buffers(bp); if (bp->rx_ring) { - dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES, + dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES(bp), bp->rx_ring, bp->rx_ring_dma); bp->rx_ring = NULL; } @@ -1500,7 +1512,7 @@ static void macb_free_consistent(struct macb *bp) kfree(queue->tx_skb); queue->tx_skb = NULL; if (queue->tx_ring) { - dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES, + dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES(bp), queue->tx_ring, queue->tx_ring_dma); queue->tx_ring = NULL; } @@ -1511,14 +1523,14 @@ static int gem_alloc_rx_buffers(struct macb *bp) { int size; - size = RX_RING_SIZE * sizeof(struct sk_buff *); + size = bp->rx_ring_size * sizeof(struct sk_buff *); bp->rx_skbuff = kzalloc(size, GFP_KERNEL); if (!bp->rx_skbuff) return -ENOMEM; - - netdev_dbg(bp->dev, - "Allocated %d RX struct sk_buff entries at %p\n", - RX_RING_SIZE, bp->rx_skbuff); + else + netdev_dbg(bp->dev, + "Allocated %d RX struct sk_buff entries at %p\n", + bp->rx_ring_size, bp->rx_skbuff); return 0; } @@ -1526,7 +1538,7 @@ static int macb_alloc_rx_buffers(struct macb *bp) { int size; - size = RX_RING_SIZE * bp->rx_buffer_size; + size = bp->rx_ring_size * bp->rx_buffer_size; bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size, &bp->rx_buffers_dma, GFP_KERNEL); if (!bp->rx_buffers) @@ -1545,7 +1557,7 @@ static int macb_alloc_consistent(struct macb *bp) int size; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - size = TX_RING_BYTES; + size = TX_RING_BYTES(bp); queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size, &queue->tx_ring_dma, GFP_KERNEL); @@ -1556,13 +1568,13 @@ static int macb_alloc_consistent(struct macb *bp) q, size, (unsigned long)queue->tx_ring_dma, queue->tx_ring); - size = TX_RING_SIZE * sizeof(struct macb_tx_skb); + size = bp->tx_ring_size * sizeof(struct macb_tx_skb); queue->tx_skb = kmalloc(size, GFP_KERNEL); if (!queue->tx_skb) goto out_err; } - size = RX_RING_BYTES; + size = RX_RING_BYTES(bp); bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, &bp->rx_ring_dma, GFP_KERNEL); if (!bp->rx_ring) @@ -1588,11 +1600,11 @@ static void gem_init_rings(struct macb *bp) int i; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - for (i = 0; i < TX_RING_SIZE; i++) { - macb_set_addr(&(queue->tx_ring[i]), 0); + for (i = 0; i < bp->tx_ring_size; i++) { + queue->tx_ring[i].addr = 0; queue->tx_ring[i].ctrl = MACB_BIT(TX_USED); } - queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); + queue->tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP); queue->tx_head = 0; queue->tx_tail = 0; } @@ -1609,13 +1621,13 @@ static void macb_init_rings(struct macb *bp) macb_init_rx_ring(bp); - for (i = 0; i < TX_RING_SIZE; i++) { + for (i = 0; i < bp->tx_ring_size; i++) { bp->queues[0].tx_ring[i].addr = 0; bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED); } bp->queues[0].tx_head = 0; bp->queues[0].tx_tail = 0; - bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); + bp->queues[0].tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP); bp->rx_tail = 0; } @@ -1986,19 +1998,9 @@ static int macb_close(struct net_device *dev) static int macb_change_mtu(struct net_device *dev, int new_mtu) { - struct macb *bp = netdev_priv(dev); - u32 max_mtu; - if (netif_running(dev)) return -EBUSY; - max_mtu = ETH_DATA_LEN; - if (bp->caps & MACB_CAPS_JUMBO) - max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN; - - if ((new_mtu > max_mtu) || (new_mtu < GEM_MTU_MIN_SIZE)) - return -EINVAL; - dev->mtu = new_mtu; return 0; @@ -2158,8 +2160,8 @@ static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs, regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1)) | MACB_GREGS_VERSION; - tail = macb_tx_ring_wrap(bp->queues[0].tx_tail); - head = macb_tx_ring_wrap(bp->queues[0].tx_head); + tail = macb_tx_ring_wrap(bp, bp->queues[0].tx_tail); + head = macb_tx_ring_wrap(bp, bp->queues[0].tx_head); regs_buff[0] = macb_readl(bp, NCR); regs_buff[1] = macb_or_gem_readl(bp, NCFGR); @@ -2214,6 +2216,56 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } +static void macb_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct macb *bp = netdev_priv(netdev); + + ring->rx_max_pending = MAX_RX_RING_SIZE; + ring->tx_max_pending = MAX_TX_RING_SIZE; + + ring->rx_pending = bp->rx_ring_size; + ring->tx_pending = bp->tx_ring_size; +} + +static int macb_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct macb *bp = netdev_priv(netdev); + u32 new_rx_size, new_tx_size; + unsigned int reset = 0; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_rx_size = clamp_t(u32, ring->rx_pending, + MIN_RX_RING_SIZE, MAX_RX_RING_SIZE); + new_rx_size = roundup_pow_of_two(new_rx_size); + + new_tx_size = clamp_t(u32, ring->tx_pending, + MIN_TX_RING_SIZE, MAX_TX_RING_SIZE); + new_tx_size = roundup_pow_of_two(new_tx_size); + + if ((new_tx_size == bp->tx_ring_size) && + (new_rx_size == bp->rx_ring_size)) { + /* nothing to do */ + return 0; + } + + if (netif_running(bp->dev)) { + reset = 1; + macb_close(bp->dev); + } + + bp->rx_ring_size = new_rx_size; + bp->tx_ring_size = new_tx_size; + + if (reset) + macb_open(bp->dev); + + return 0; +} + static const struct ethtool_ops macb_ethtool_ops = { .get_regs_len = macb_get_regs_len, .get_regs = macb_get_regs, @@ -2223,6 +2275,8 @@ static const struct ethtool_ops macb_ethtool_ops = { .set_wol = macb_set_wol, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_ringparam = macb_get_ringparam, + .set_ringparam = macb_set_ringparam, }; static const struct ethtool_ops gem_ethtool_ops = { @@ -2235,6 +2289,8 @@ static const struct ethtool_ops gem_ethtool_ops = { .get_sset_count = gem_get_sset_count, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_ringparam = macb_get_ringparam, + .set_ringparam = macb_set_ringparam, }; static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -2429,6 +2485,9 @@ static int macb_init(struct platform_device *pdev) int err; u32 val; + bp->tx_ring_size = DEFAULT_TX_RING_SIZE; + bp->rx_ring_size = DEFAULT_RX_RING_SIZE; + /* set the queue register mapping once for all: queue0 has a special * register mapping but we don't want to test the queue index then * compute the corresponding register offset at run time. @@ -2793,7 +2852,6 @@ static const struct net_device_ops at91ether_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_do_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = at91ether_poll_controller, #endif @@ -3028,6 +3086,13 @@ static int macb_probe(struct platform_device *pdev) goto err_out_free_netdev; } + /* MTU range: 68 - 1500 or 10240 */ + dev->min_mtu = GEM_MTU_MIN_SIZE; + if (bp->caps & MACB_CAPS_JUMBO) + dev->max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN; + else + dev->max_mtu = ETH_DATA_LEN; + mac = of_get_mac_address(np); if (mac) ether_addr_copy(bp->dev->dev_addr, mac); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 8bed4b52fef5..1216950c97d1 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -811,6 +811,9 @@ struct macb { void *rx_buffers; size_t rx_buffer_size; + unsigned int rx_ring_size; + unsigned int tx_ring_size; + unsigned int num_queues; unsigned int queue_mask; struct macb_queue queues[MACB_MAX_QUEUES]; diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 63efa0dc45ba..6e723662ee69 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -394,7 +394,7 @@ struct xgmac_priv { }; /* XGMAC Configuration Settings */ -#define MAX_MTU 9000 +#define XGMAC_MAX_MTU 9000 #define PAUSE_TIME 0x400 #define DMA_RX_RING_SZ 256 @@ -1360,20 +1360,6 @@ out: */ static int xgmac_change_mtu(struct net_device *dev, int new_mtu) { - struct xgmac_priv *priv = netdev_priv(dev); - int old_mtu; - - if ((new_mtu < 46) || (new_mtu > MAX_MTU)) { - netdev_err(priv->dev, "invalid MTU, max MTU is: %d\n", MAX_MTU); - return -EINVAL; - } - - old_mtu = dev->mtu; - - /* return early if the buffer sizes will not change */ - if (old_mtu == new_mtu) - return 0; - /* Stop everything, get ready to change the MTU */ if (!netif_running(dev)) return 0; @@ -1804,6 +1790,10 @@ static int xgmac_probe(struct platform_device *pdev) ndev->features |= ndev->hw_features; ndev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 46 - 9000 */ + ndev->min_mtu = ETH_ZLEN - ETH_HLEN; + ndev->max_mtu = XGMAC_MAX_MTU; + /* Get the MAC address */ xgmac_get_mac_addr(priv->base, ndev->dev_addr, 0); if (!is_valid_ether_addr(ndev->dev_addr)) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index afc6f9dc8119..71d01a77896d 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2868,17 +2868,6 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu) struct octnic_ctrl_pkt nctrl; int ret = 0; - /* Limit the MTU to make sure the ethernet packets are between 68 bytes - * and 16000 bytes - */ - if ((new_mtu < LIO_MIN_MTU_SIZE) || - (new_mtu > LIO_MAX_MTU_SIZE)) { - dev_err(&oct->pci_dev->dev, "Invalid MTU: %d\n", new_mtu); - dev_err(&oct->pci_dev->dev, "Valid range %d and %d\n", - LIO_MIN_MTU_SIZE, LIO_MAX_MTU_SIZE); - return -EINVAL; - } - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); nctrl.ncmd.u64 = 0; @@ -3891,6 +3880,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) netdev->hw_features = netdev->hw_features & ~NETIF_F_HW_VLAN_CTAG_RX; + /* MTU range: 68 - 16000 */ + netdev->min_mtu = LIO_MIN_MTU_SIZE; + netdev->max_mtu = LIO_MAX_MTU_SIZE; + /* Point to the properties for octeon device to which this * interface belongs. */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index e5d1debd05ad..54b966596323 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -29,7 +29,7 @@ #include <linux/ptp_clock_kernel.h> #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) -#define LIO_MIN_MTU_SIZE 68 +#define LIO_MIN_MTU_SIZE ETH_MIN_MTU struct oct_nic_stats_resp { u64 rh; diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 4ab404f45b21..16e12c45904b 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -645,16 +645,6 @@ static int octeon_mgmt_change_mtu(struct net_device *netdev, int new_mtu) struct octeon_mgmt *p = netdev_priv(netdev); int size_without_fcs = new_mtu + OCTEON_MGMT_RX_HEADROOM; - /* Limit the MTU to make sure the ethernet packets are between - * 64 bytes and 16383 bytes. - */ - if (size_without_fcs < 64 || size_without_fcs > 16383) { - dev_warn(p->dev, "MTU must be between %d and %d.\n", - 64 - OCTEON_MGMT_RX_HEADROOM, - 16383 - OCTEON_MGMT_RX_HEADROOM); - return -EINVAL; - } - netdev->mtu = new_mtu; cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_MAX, size_without_fcs); @@ -1491,6 +1481,9 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->netdev_ops = &octeon_mgmt_ops; netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; + netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; + netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM; + mac = of_get_mac_address(pdev->dev.of_node); if (mac) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 45a13f718863..b192712c93b7 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1312,12 +1312,6 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - if (new_mtu > NIC_HW_MAX_FRS) - return -EINVAL; - - if (new_mtu < NIC_HW_MIN_FRS) - return -EINVAL; - if (nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; netdev->mtu = new_mtu; @@ -1630,6 +1624,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; + /* MTU range: 64 - 9200 */ + netdev->min_mtu = NIC_HW_MIN_FRS; + netdev->max_mtu = NIC_HW_MAX_FRS; + INIT_WORK(&nic->reset_task, nicvf_reset_task); err = register_netdev(netdev); diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h index 53b1f9478383..6916c62f2487 100644 --- a/drivers/net/ethernet/chelsio/cxgb/common.h +++ b/drivers/net/ethernet/chelsio/cxgb/common.h @@ -85,6 +85,11 @@ struct t1_rx_mode { #define SPEED_INVALID 0xffff #define DUPLEX_INVALID 0xff +/* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */ +#define PM3393_MAX_FRAME_SIZE 9600 + +#define VSC7326_MAX_MTU 9600 + enum { CHBT_BOARD_N110, CHBT_BOARD_N210, diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index f5f1b0b51ebd..81d1d0bc7553 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -825,8 +825,6 @@ static int t1_change_mtu(struct net_device *dev, int new_mtu) if (!mac->ops->set_mtu) return -EOPNOTSUPP; - if (new_mtu < 68) - return -EINVAL; if ((ret = mac->ops->set_mtu(mac, new_mtu))) return ret; dev->mtu = new_mtu; @@ -1101,6 +1099,22 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(netdev, &adapter->napi, t1_poll, 64); netdev->ethtool_ops = &t1_ethtool_ops; + + switch (bi->board) { + case CHBT_BOARD_CHT110: + case CHBT_BOARD_N110: + case CHBT_BOARD_N210: + case CHBT_BOARD_CHT210: + netdev->max_mtu = PM3393_MAX_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; + case CHBT_BOARD_CHN204: + netdev->max_mtu = VSC7326_MAX_MTU; + break; + default: + netdev->max_mtu = ETH_DATA_LEN; + break; + } } if (t1_init_sw_modules(adapter, bi) < 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c index eb462d7db427..c27908e66f5e 100644 --- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c +++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c @@ -47,9 +47,6 @@ #define OFFSET(REG_ADDR) ((REG_ADDR) << 2) -/* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */ -#define MAX_FRAME_SIZE 9600 - #define IPG 12 #define TXXG_CONF1_VAL ((IPG << SUNI1x10GEXP_BITOFF_TXXG_IPGT) | \ SUNI1x10GEXP_BITMSK_TXXG_32BIT_ALIGN | SUNI1x10GEXP_BITMSK_TXXG_CRCEN | \ @@ -331,10 +328,7 @@ static int pm3393_set_mtu(struct cmac *cmac, int mtu) { int enabled = cmac->instance->enabled; - /* MAX_FRAME_SIZE includes header + FCS, mtu doesn't */ - mtu += 14 + 4; - if (mtu > MAX_FRAME_SIZE) - return -EINVAL; + mtu += ETH_HLEN + ETH_FCS_LEN; /* Disable Rx/Tx MAC before configuring it. */ if (enabled) diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c index 6f30b6f78553..bdc895bd2a46 100644 --- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c +++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c @@ -11,8 +11,6 @@ /* 30 minutes for full statistics update */ #define MAJOR_UPDATE_TICKS (1800 / STATS_TICK_SECS) -#define MAX_MTU 9600 - /* The egress WM value 0x01a01fff should be used only when the * interface is down (MAC port disabled). This is a workaround * for disabling the T2/MAC flow-control. When the interface is @@ -452,9 +450,6 @@ static int mac_set_mtu(struct cmac *mac, int mtu) { int port = mac->instance->index; - if (mtu > MAX_MTU) - return -EINVAL; - /* max_len includes header and FCS */ vsc_write(mac->adapter, REG_MAX_LEN(port), mtu + 14 + 4); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 43da891fab97..092b3c16440b 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2531,8 +2531,6 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) struct adapter *adapter = pi->adapter; int ret; - if (new_mtu < 81) /* accommodate SACK */ - return -EINVAL; if ((ret = t3_mac_set_mtu(&pi->mac, new_mtu))) return ret; dev->mtu = new_mtu; @@ -3295,6 +3293,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &cxgb_netdev_ops; netdev->ethtool_ops = &cxgb_ethtool_ops; + netdev->min_mtu = 81; + netdev->max_mtu = ETH_MAX_MTU; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 57eb4e1345cb..c0cc2ee77be7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2502,8 +2502,6 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) int ret; struct port_info *pi = netdev_priv(dev); - if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */ - return -EINVAL; ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, new_mtu, -1, -1, -1, -1, true); if (!ret) @@ -4803,6 +4801,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 81 - 9600 */ + netdev->min_mtu = 81; + netdev->max_mtu = MAX_MTU; + netdev->netdev_ops = &cxgb4_netdev_ops; #ifdef CONFIG_CHELSIO_T4_DCB netdev->dcbnl_ops = &cxgb4_dcb_ops; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 49d2debb334e..52af62e0ecb6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -113,7 +113,7 @@ static int fill_action_fields(struct adapter *adap, } /* Re-direct to specified port in hardware. */ - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { struct net_device *n_dev; unsigned int i, index; bool found = false; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 100b2cc064a3..5d4da0e8acaa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1108,10 +1108,6 @@ static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu) int ret; struct port_info *pi = netdev_priv(dev); - /* accommodate SACK */ - if (new_mtu < 81) - return -EINVAL; - ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu, -1, -1, -1, -1, true); if (!ret) @@ -2966,6 +2962,8 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->min_mtu = 81; + netdev->max_mtu = ETH_MAX_MTU; netdev->netdev_ops = &cxgb4vf_netdev_ops; netdev->ethtool_ops = &cxgb4vf_ethtool_ops; diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index c363b58552e9..3647b28e8de0 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1266,7 +1266,6 @@ static const struct net_device_ops net_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = net_poll_controller, #endif - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index de9f7c97d916..9119af088821 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -749,7 +749,6 @@ static const struct net_device_ops ep93xx_netdev_ops = { .ndo_start_xmit = ep93xx_xmit, .ndo_do_ioctl = ep93xx_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c index 07719676c305..b600fbbbf679 100644 --- a/drivers/net/ethernet/cirrus/mac89x0.c +++ b/drivers/net/ethernet/cirrus/mac89x0.c @@ -172,7 +172,6 @@ static const struct net_device_ops mac89x0_netdev_ops = { .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; /* Probe for the CS8900 card in slot E. We won't bother looking diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 130f910e4785..9023c858715d 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -33,7 +33,7 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" -#define DRV_VERSION "2.3.0.20" +#define DRV_VERSION "2.3.0.31" #define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc" #define ENIC_BARS_MAX 6 diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 48f82ab6c25b..cdd7a1a59aa7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1166,12 +1166,18 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, q_number); if (netdev->features & NETIF_F_RXHASH) { - skb_set_hash(skb, rss_hash, - (rss_type & - (NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV6 | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV4)) ? - PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); + switch (rss_type) { + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); + break; + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); + break; + } } /* Hardware does not provide whole packet checksum. It only @@ -1843,9 +1849,6 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) struct enic *enic = netdev_priv(netdev); int running = netif_running(netdev); - if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU) - return -EINVAL; - if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; @@ -2751,6 +2754,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 68 - 9000 */ + netdev->min_mtu = ENIC_MIN_MTU; + netdev->max_mtu = ENIC_MAX_MTU; + err = register_netdev(netdev); if (err) { dev_err(dev, "Cannot register net device, aborting\n"); diff --git a/drivers/net/ethernet/cisco/enic/enic_res.h b/drivers/net/ethernet/cisco/enic/enic_res.h index 69f60afd6577..81f98a8b60e9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.h +++ b/drivers/net/ethernet/cisco/enic/enic_res.h @@ -30,7 +30,7 @@ #define ENIC_MIN_RQ_DESCS 64 #define ENIC_MAX_RQ_DESCS 4096 -#define ENIC_MIN_MTU 68 +#define ENIC_MIN_MTU ETH_MIN_MTU #define ENIC_MAX_MTU 9000 #define ENIC_MULTICAST_PERFECT_FILTERS 32 diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index f45385f5c6e5..f1a81c52afe3 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1382,7 +1382,6 @@ static const struct net_device_ops dm9000_netdev_ops = { .ndo_tx_timeout = dm9000_timeout, .ndo_set_rx_mode = dm9000_hash_table, .ndo_do_ioctl = dm9000_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_features = dm9000_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index cadcee645f74..90c573b8ccaf 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1956,7 +1956,6 @@ static const struct net_device_ops de_netdev_ops = { .ndo_start_xmit = de_start_xmit, .ndo_get_stats = de_get_stats, .ndo_tx_timeout = de_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 6620fc861c47..51fda3a6b13f 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1085,7 +1085,6 @@ static const struct net_device_ops de4x5_netdev_ops = { .ndo_get_stats = de4x5_get_stats, .ndo_set_rx_mode = set_multicast_list, .ndo_do_ioctl = de4x5_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address= eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 8ed0fd8b1dda..df4994919456 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -352,7 +352,6 @@ static const struct net_device_ops netdev_ops = { .ndo_stop = dmfe_stop, .ndo_start_xmit = dmfe_start_xmit, .ndo_set_rx_mode = dmfe_set_filter_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index bbde90bc74fe..5f1377449b8f 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1282,7 +1282,6 @@ static const struct net_device_ops tulip_netdev_ops = { .ndo_get_stats = tulip_get_stats, .ndo_do_ioctl = private_ioctl, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index e750b5ddc0fb..e1c4133b8787 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -269,7 +269,6 @@ static const struct net_device_ops netdev_ops = { .ndo_stop = uli526x_stop, .ndo_start_xmit = uli526x_start_xmit, .ndo_set_rx_mode = uli526x_set_filter_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 1f62b9423851..feda96d585e7 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -353,7 +353,6 @@ static const struct net_device_ops netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_do_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c index 0e721cedfa67..19e4ea15b504 100644 --- a/drivers/net/ethernet/dec/tulip/xircom_cb.c +++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c @@ -174,7 +174,6 @@ static const struct net_device_ops netdev_ops = { .ndo_open = xircom_open, .ndo_stop = xircom_close, .ndo_start_xmit = xircom_start_xmit, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 78f144696d6b..8c95a8a81e3c 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -76,7 +76,6 @@ static void rio_free_tx (struct net_device *dev, int irq); static void tx_error (struct net_device *dev, int tx_status); static int receive_packet (struct net_device *dev); static void rio_error (struct net_device *dev, int int_status); -static int change_mtu (struct net_device *dev, int new_mtu); static void set_multicast (struct net_device *dev); static struct net_device_stats *get_stats (struct net_device *dev); static int clear_stats (struct net_device *dev); @@ -106,7 +105,6 @@ static const struct net_device_ops netdev_ops = { .ndo_set_rx_mode = set_multicast, .ndo_do_ioctl = rio_ioctl, .ndo_tx_timeout = rio_tx_timeout, - .ndo_change_mtu = change_mtu, }; static int @@ -230,6 +228,10 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) #if 0 dev->features = NETIF_F_IP_CSUM; #endif + /* MTU range: 68 - 1536 or 8000 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = np->jumbo ? MAX_JUMBO : PACKET_SIZE; + pci_set_drvdata (pdev, dev); ring_space = pci_alloc_consistent (pdev, TX_TOTAL_SIZE, &ring_dma); @@ -1198,22 +1200,6 @@ clear_stats (struct net_device *dev) return 0; } - -static int -change_mtu (struct net_device *dev, int new_mtu) -{ - struct netdev_private *np = netdev_priv(dev); - int max = (np->jumbo) ? MAX_JUMBO : 1536; - - if ((new_mtu < 68) || (new_mtu > max)) { - return -EINVAL; - } - - dev->mtu = new_mtu; - - return 0; -} - static void set_multicast (struct net_device *dev) { diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 79d80090eac8..eab36acfc0d1 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -580,6 +580,10 @@ static int sundance_probe1(struct pci_dev *pdev, dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; + /* MTU range: 68 - 8191 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 8191; + pci_set_drvdata(pdev, dev); i = register_netdev(dev); @@ -713,8 +717,6 @@ err_out_netdev: static int change_mtu(struct net_device *dev, int new_mtu) { - if ((new_mtu < 68) || (new_mtu > 8191)) /* Set by RxDMAFrameLen */ - return -EINVAL; if (netif_running(dev)) return -EBUSY; dev->mtu = new_mtu; diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index c3b64cdd0dec..2a17c59f69f9 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -767,7 +767,6 @@ static const struct net_device_ops dnet_netdev_ops = { .ndo_do_ioctl = dnet_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int dnet_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index f7b42483921c..57650953ff83 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -482,7 +482,6 @@ static const struct net_device_ops ec_bhf_netdev_ops = { .ndo_open = ec_bhf_open, .ndo_stop = ec_bhf_stop, .ndo_get_stats64 = ec_bhf_get_stats, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr }; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index cece8a08edca..3f6152cc648c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1406,23 +1406,6 @@ drop: return NETDEV_TX_OK; } -static int be_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct be_adapter *adapter = netdev_priv(netdev); - struct device *dev = &adapter->pdev->dev; - - if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) { - dev_info(dev, "MTU must be between %d and %d bytes\n", - BE_MIN_MTU, BE_MAX_MTU); - return -EINVAL; - } - - dev_info(dev, "MTU changed from %d to %d bytes\n", - netdev->mtu, new_mtu); - netdev->mtu = new_mtu; - return 0; -} - static inline bool be_in_all_promisc(struct be_adapter *adapter) { return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) == @@ -5216,7 +5199,6 @@ static const struct net_device_ops be_netdev_ops = { .ndo_start_xmit = be_xmit, .ndo_set_rx_mode = be_set_rx_mode, .ndo_set_mac_address = be_mac_addr_set, - .ndo_change_mtu = be_change_mtu, .ndo_get_stats64 = be_get_stats64, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = be_vlan_add_vid, @@ -5266,6 +5248,10 @@ static void be_netdev_init(struct net_device *netdev) netdev->netdev_ops = &be_netdev_ops; netdev->ethtool_ops = &be_ethtool_ops; + + /* MTU range: 256 - 9000 */ + netdev->min_mtu = BE_MIN_MTU; + netdev->max_mtu = BE_MAX_MTU; } static void be_cleanup(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c08bd763172a..6967b287b6e7 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -472,7 +472,6 @@ static const struct net_device_ops netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_do_ioctl = mii_ioctl, .ndo_tx_timeout = fealnx_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 5aa9d4ded214..01aebc0084a9 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1841,11 +1841,11 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - if (fep->clk_enet_out) { - ret = clk_prepare_enable(fep->clk_enet_out); - if (ret) - goto failed_clk_enet_out; - } + + ret = clk_prepare_enable(fep->clk_enet_out); + if (ret) + goto failed_clk_enet_out; + if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); ret = clk_prepare_enable(fep->clk_ptp); @@ -1857,23 +1857,20 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } mutex_unlock(&fep->ptp_clk_mutex); } - if (fep->clk_ref) { - ret = clk_prepare_enable(fep->clk_ref); - if (ret) - goto failed_clk_ref; - } + + ret = clk_prepare_enable(fep->clk_ref); + if (ret) + goto failed_clk_ref; } else { clk_disable_unprepare(fep->clk_ahb); - if (fep->clk_enet_out) - clk_disable_unprepare(fep->clk_enet_out); + clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); clk_disable_unprepare(fep->clk_ptp); fep->ptp_clk_on = false; mutex_unlock(&fep->ptp_clk_mutex); } - if (fep->clk_ref) - clk_disable_unprepare(fep->clk_ref); + clk_disable_unprepare(fep->clk_ref); } return 0; @@ -3055,7 +3052,6 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_stop = fec_enet_close, .ndo_start_xmit = fec_enet_start_xmit, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = fec_timeout, .ndo_set_mac_address = fec_set_mac_address, diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 446ae9d60c71..aa8cf5d2a53c 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -802,7 +802,6 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = { .ndo_set_mac_address = mpc52xx_fec_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = mpc52xx_fec_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mpc52xx_fec_tx_timeout, .ndo_get_stats = mpc52xx_fec_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 8fe6b3e253fa..cc5d07cfe535 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -879,13 +879,17 @@ static int mac_probe(struct platform_device *_of_dev) priv->fixed_link = kzalloc(sizeof(*priv->fixed_link), GFP_KERNEL); - if (!priv->fixed_link) + if (!priv->fixed_link) { + err = -ENOMEM; goto _return_dev_set_drvdata; + } priv->phy_node = of_node_get(mac_node); phy = of_phy_find_device(priv->phy_node); - if (!phy) + if (!phy) { + err = -EINVAL; goto _return_dev_set_drvdata; + } priv->fixed_link->link = phy->link; priv->fixed_link->speed = phy->speed; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index dc120c148d97..44f50e168703 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -118,22 +118,22 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) { if (sc & BD_ENET_TX_HB) /* No heartbeat */ - fep->stats.tx_heartbeat_errors++; + dev->stats.tx_heartbeat_errors++; if (sc & BD_ENET_TX_LC) /* Late collision */ - fep->stats.tx_window_errors++; + dev->stats.tx_window_errors++; if (sc & BD_ENET_TX_RL) /* Retrans limit */ - fep->stats.tx_aborted_errors++; + dev->stats.tx_aborted_errors++; if (sc & BD_ENET_TX_UN) /* Underrun */ - fep->stats.tx_fifo_errors++; + dev->stats.tx_fifo_errors++; if (sc & BD_ENET_TX_CSL) /* Carrier lost */ - fep->stats.tx_carrier_errors++; + dev->stats.tx_carrier_errors++; if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { - fep->stats.tx_errors++; + dev->stats.tx_errors++; do_restart = 1; } } else - fep->stats.tx_packets++; + dev->stats.tx_packets++; if (sc & BD_ENET_TX_READY) { dev_warn(fep->dev, @@ -145,7 +145,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) * but we eventually sent the packet OK. */ if (sc & BD_ENET_TX_DEF) - fep->stats.collisions++; + dev->stats.collisions++; /* unmap */ if (fep->mapped_as_page[dirtyidx]) @@ -212,19 +212,19 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) */ if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL | BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) { - fep->stats.rx_errors++; + dev->stats.rx_errors++; /* Frame too long or too short. */ if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) - fep->stats.rx_length_errors++; + dev->stats.rx_length_errors++; /* Frame alignment */ if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL)) - fep->stats.rx_frame_errors++; + dev->stats.rx_frame_errors++; /* CRC Error */ if (sc & BD_ENET_RX_CR) - fep->stats.rx_crc_errors++; + dev->stats.rx_crc_errors++; /* FIFO overrun */ if (sc & BD_ENET_RX_OV) - fep->stats.rx_crc_errors++; + dev->stats.rx_crc_errors++; skbn = fep->rx_skbuff[curidx]; } else { @@ -233,9 +233,9 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) /* * Process the incoming frame. */ - fep->stats.rx_packets++; + dev->stats.rx_packets++; pkt_len = CBDR_DATLEN(bdp) - 4; /* remove CRC */ - fep->stats.rx_bytes += pkt_len + 4; + dev->stats.rx_bytes += pkt_len + 4; if (pkt_len <= fpi->rx_copybreak) { /* +2 to make IP header L1 cache aligned */ @@ -277,7 +277,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) received++; netif_receive_skb(skb); } else { - fep->stats.rx_dropped++; + dev->stats.rx_dropped++; skbn = skb; } } @@ -543,7 +543,7 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) curidx = bdp - fep->tx_bd_base; len = skb->len; - fep->stats.tx_bytes += len; + dev->stats.tx_bytes += len; if (nr_frags) len -= skb->data_len; fep->tx_free -= nr_frags + 1; @@ -619,7 +619,7 @@ static void fs_timeout(struct net_device *dev) unsigned long flags; int wake = 0; - fep->stats.tx_errors++; + dev->stats.tx_errors++; spin_lock_irqsave(&fep->lock, flags); @@ -774,12 +774,6 @@ static int fs_enet_close(struct net_device *dev) return 0; } -static struct net_device_stats *fs_enet_get_stats(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - return &fep->stats; -} - /*************************************************************************/ static void fs_get_drvinfo(struct net_device *dev, @@ -905,14 +899,12 @@ extern void fs_mii_disconnect(struct net_device *dev); static const struct net_device_ops fs_enet_netdev_ops = { .ndo_open = fs_enet_open, .ndo_stop = fs_enet_close, - .ndo_get_stats = fs_enet_get_stats, .ndo_start_xmit = fs_enet_start_xmit, .ndo_tx_timeout = fs_timeout, .ndo_set_rx_mode = fs_set_multicast_list, .ndo_do_ioctl = fs_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = fs_enet_netpoll, #endif diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index fee24c822fad..5ce516c8a62a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -137,7 +137,6 @@ struct fs_enet_private { cbd_t __iomem *cur_rx; cbd_t __iomem *cur_tx; int tx_free; - struct net_device_stats stats; struct timer_list phy_timer_list; const struct phy_info *phy; u32 msg_enable; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4b4f5bc0e279..409210413f5d 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1338,7 +1338,10 @@ static int gfar_probe(struct platform_device *ofdev) /* Fill in the dev structure */ dev->watchdog_timeo = TX_TIMEOUT; + /* MTU range: 50 - 9586 */ dev->mtu = 1500; + dev->min_mtu = 50; + dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN; dev->netdev_ops = &gfar_netdev_ops; dev->ethtool_ops = &gfar_ethtool_ops; @@ -2592,12 +2595,6 @@ static int gfar_set_mac_address(struct net_device *dev) static int gfar_change_mtu(struct net_device *dev, int new_mtu) { struct gfar_private *priv = netdev_priv(dev); - int frame_size = new_mtu + ETH_HLEN; - - if ((frame_size < 64) || (frame_size > GFAR_JUMBO_FRAME_SIZE)) { - netif_err(priv, drv, dev, "Invalid MTU setting\n"); - return -EINVAL; - } while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state)) cpu_relax(); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 186ef8f16c80..786182480a73 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3681,7 +3681,6 @@ static const struct net_device_ops ucc_geth_netdev_ops = { .ndo_start_xmit = ucc_geth_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ucc_geth_set_mac_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_rx_mode = ucc_geth_set_multi, .ndo_tx_timeout = ucc_geth_timeout, .ndo_do_ioctl = ucc_geth_ioctl, diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 399cfd217288..51c4abc51bf4 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -225,7 +225,6 @@ static const struct net_device_ops fjn_netdev_ops = { .ndo_tx_timeout = fjn_tx_timeout, .ndo_set_config = fjn_config, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 39778892b3b3..854befde0a08 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -769,7 +769,6 @@ static const struct net_device_ops hip04_netdev_ops = { .ndo_set_mac_address = hip04_set_mac_address, .ndo_tx_timeout = hip04_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int hip04_alloc_ring(struct net_device *ndev, struct device *d) @@ -898,7 +897,6 @@ static int hip04_mac_probe(struct platform_device *pdev) INIT_WORK(&priv->tx_timeout_task, hip04_tx_timeout_task); - ether_setup(ndev); ndev->netdev_ops = &hip04_netdev_ops; ndev->ethtool_ops = &hip04_ethtool_ops; ndev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index ced185962ef8..49863068c59e 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -712,7 +712,6 @@ static const struct net_device_ops hisi_femac_netdev_ops = { .ndo_do_ioctl = hisi_femac_net_ioctl, .ndo_set_mac_address = hisi_femac_set_mac_address, .ndo_set_rx_mode = hisi_femac_net_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, }; static void hisi_femac_core_reset(struct hisi_femac_priv *priv) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index e093cbf26c8c..09602f1187f5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -426,8 +426,14 @@ enum hnae_media_type { * get mac address * set_mac_addr() * set mac address + * clr_mc_addr() + * clear mcast tcam table * set_mc_addr() * set multicast mode + * add_uc_addr() + * add ucast address + * rm_uc_addr() + * remove ucast address * set_mtu() * set mtu * update_stats() @@ -488,6 +494,11 @@ struct hnae_ae_ops { void (*set_promisc_mode)(struct hnae_handle *handle, u32 en); int (*get_mac_addr)(struct hnae_handle *handle, void **p); int (*set_mac_addr)(struct hnae_handle *handle, void *p); + int (*add_uc_addr)(struct hnae_handle *handle, + const unsigned char *addr); + int (*rm_uc_addr)(struct hnae_handle *handle, + const unsigned char *addr); + int (*clr_mc_addr)(struct hnae_handle *handle); int (*set_mc_addr)(struct hnae_handle *handle, void *addr); int (*set_mtu)(struct hnae_handle *handle, int new_mtu); void (*set_tso_stats)(struct hnae_handle *handle, int enable); @@ -590,7 +601,7 @@ static inline int hnae_alloc_buffer_attach(struct hnae_ring *ring, int i) if (ret) return ret; - ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); return 0; } @@ -621,14 +632,14 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, bops->unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } static inline void hnae_reuse_buffer(struct hnae_ring *ring, int i) { ring->desc_cb[i].reuse_flag = 0; - ring->desc[i].addr = (__le64)(ring->desc_cb[i].dma + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 2d0cb609adc3..0a9cdf00b31a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -18,9 +18,6 @@ #include "hns_dsaf_rcb.h" #define AE_NAME_PORT_ID_IDX 6 -#define ETH_STATIC_REG 1 -#define ETH_DUMP_REG 5 -#define ETH_GSTRING_LEN 32 static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle) { @@ -202,6 +199,28 @@ static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p) return 0; } +static int hns_ae_add_uc_address(struct hnae_handle *handle, + const unsigned char *addr) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return -ENOSPC; + + return hns_mac_add_uc_addr(mac_cb, handle->vf_id, addr); +} + +static int hns_ae_rm_uc_address(struct hnae_handle *handle, + const unsigned char *addr) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return -ENOSPC; + + return hns_mac_rm_uc_addr(mac_cb, handle->vf_id, addr); +} + static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) { int ret; @@ -235,6 +254,16 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) return ret; } +static int hns_ae_clr_multicast(struct hnae_handle *handle) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return 0; + + return hns_mac_clr_multicast(mac_cb, handle->vf_id); +} + static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu) { struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); @@ -823,7 +852,10 @@ static struct hnae_ae_ops hns_dsaf_ops = { .get_coalesce_range = hns_ae_get_coalesce_range, .set_promisc_mode = hns_ae_set_promisc_mode, .set_mac_addr = hns_ae_set_mac_address, + .add_uc_addr = hns_ae_add_uc_address, + .rm_uc_addr = hns_ae_rm_uc_address, .set_mc_addr = hns_ae_set_multicast_one, + .clr_mc_addr = hns_ae_clr_multicast, .set_mtu = hns_ae_set_mtu, .update_stats = hns_ae_update_stats, .set_tso_stats = hns_ae_set_tso_stats, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 1e1eb92998fb..3382441fe7b5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -37,8 +37,8 @@ static const struct mac_stats_string g_gmac_stats_string[] = { {"gmac_rx_very_long_err", MAC_STATS_FIELD_OFF(rx_long_err)}, {"gmac_rx_runt_err", MAC_STATS_FIELD_OFF(rx_minto64)}, {"gmac_rx_short_err", MAC_STATS_FIELD_OFF(rx_under_min)}, - {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_bytes)}, - {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_pkts)}, + {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_pkts)}, + {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_bytes)}, {"gmac_rx_overrun_cnt", MAC_STATS_FIELD_OFF(rx_fifo_overrun_err)}, {"gmac_rx_length_err", MAC_STATS_FIELD_OFF(rx_len_err)}, {"gmac_rx_fail_comma", MAC_STATS_FIELD_OFF(rx_comma_err)}, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index ec8c738af726..3239d27143b9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -263,6 +263,46 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, return 0; } +int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + struct dsaf_drv_mac_single_dest_entry mac_entry; + int ret; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return -ENOSPC; + + memset(&mac_entry, 0, sizeof(mac_entry)); + memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); + mac_entry.in_port_num = mac_cb->mac_id; + ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num); + if (ret) + return ret; + + return hns_dsaf_set_mac_uc_entry(dsaf_dev, &mac_entry); +} + +int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + struct dsaf_drv_mac_single_dest_entry mac_entry; + int ret; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return -ENOSPC; + + memset(&mac_entry, 0, sizeof(mac_entry)); + memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); + mac_entry.in_port_num = mac_cb->mac_id; + ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num); + if (ret) + return ret; + + return hns_dsaf_rm_mac_addr(dsaf_dev, &mac_entry); +} + int hns_mac_set_multi(struct hns_mac_cb *mac_cb, u32 port_num, char *addr, bool enable) { @@ -330,13 +370,24 @@ int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac) return 0; } +int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + u8 port_num; + int ret = hns_mac_get_inner_port_num(mac_cb, vfn, &port_num); + + if (ret) + return ret; + + return hns_dsaf_clr_mac_mc_port(dsaf_dev, mac_cb->mac_id, port_num); +} + static void hns_mac_param_get(struct mac_params *param, struct hns_mac_cb *mac_cb) { param->vaddr = (void *)mac_cb->vaddr; param->mac_mode = hns_get_enet_interface(mac_cb); - memcpy(param->addr, mac_cb->addr_entry_idx[0].addr, - MAC_NUM_OCTETS_PER_ADDR); + ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr); param->mac_id = mac_cb->mac_id; param->dev = mac_cb->dev; } @@ -353,8 +404,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; - u8 addr[MAC_NUM_OCTETS_PER_ADDR] - = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct dsaf_drv_mac_single_dest_entry mac_entry; /* directy return ok in debug network mode */ @@ -389,8 +439,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; u8 port_num; - u8 addr[MAC_NUM_OCTETS_PER_ADDR] - = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mac_entry_idx *uc_mac_entry; struct dsaf_drv_mac_single_dest_entry mac_entry; @@ -453,8 +502,7 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu) if (mac_cb->mac_type == HNAE_PORT_DEBUG) max_frm = MAC_MAX_MTU_DBG; - if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) || - (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)) + if (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size) return -EINVAL; if (!drv->config_max_frame_length) @@ -869,6 +917,13 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) } } + if (fwnode_property_read_u8_array(mac_cb->fw_port, "mc-mac-mask", + mac_cb->mc_mask, ETH_ALEN)) { + dev_warn(mac_cb->dev, + "no mc-mac-mask property, set to default value.\n"); + eth_broadcast_addr(mac_cb->mc_mask); + } + return 0; } @@ -1082,6 +1137,8 @@ void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en) { struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + hns_dsaf_set_promisc_tcam(mac_cb->dsaf_dev, mac_cb->mac_id, !!en); + if (mac_ctrl_drv->set_promiscuous) mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index d3a1f72ece0e..2bb3d1e93c64 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -31,7 +31,7 @@ struct dsaf_device; #define MAC_MIN_MTU 68 #define MAC_MAX_MTU_DBG MAC_DEFAULT_MTU -#define MAC_DEFAULT_PAUSE_TIME 0xff +#define MAC_DEFAULT_PAUSE_TIME 0xffff #define MAC_GMAC_IDX 0 #define MAC_XGMAC_IDX 1 @@ -56,9 +56,6 @@ struct dsaf_device; /*check mac addr multicast*/ #define MAC_IS_MULTICAST(p) ((*((u8 *)((p) + 0)) & 0x01) ? (1) : (0)) -/**< Number of octets (8-bit bytes) in an ethernet address */ -#define MAC_NUM_OCTETS_PER_ADDR 6 - struct mac_priv { void *mac; }; @@ -189,7 +186,7 @@ struct mac_statistics { /*mac para struct ,mac get param from nic or dsaf when initialize*/ struct mac_params { - char addr[MAC_NUM_OCTETS_PER_ADDR]; + char addr[ETH_ALEN]; void *vaddr; /*virtual address*/ struct device *dev; u8 mac_id; @@ -214,7 +211,7 @@ struct mac_info { }; struct mac_entry_idx { - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 vlan_id:12; u16 valid:1; u16 qos:3; @@ -317,6 +314,7 @@ struct hns_mac_cb { u8 __iomem *serdes_vaddr; struct regmap *serdes_ctrl; struct regmap *cpld_ctrl; + char mc_mask[ETH_ALEN]; u32 cpld_ctrl_reg; u32 port_rst_off; u32 port_mode_off; @@ -409,7 +407,7 @@ struct mac_driver { }; struct mac_stats_string { - char desc[64]; + char desc[ETH_GSTRING_LEN]; unsigned long offset; }; @@ -463,5 +461,10 @@ int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb, void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en); int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num); +int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr); +int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr); +int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn); #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 8ea3d95fa483..90dbda792614 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -591,6 +591,16 @@ static void hns_dsaf_voq_bp_all_thrd_cfg(struct dsaf_device *dsaf_dev) } } +static void hns_dsaf_tbl_tcam_match_cfg( + struct dsaf_device *dsaf_dev, + struct dsaf_tbl_tcam_data *ptbl_tcam_data) +{ + dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_L_REG, + ptbl_tcam_data->tbl_tcam_data_low); + dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_H_REG, + ptbl_tcam_data->tbl_tcam_data_high); +} + /** * hns_dsaf_tbl_tcam_data_cfg - tbl * @dsaf_id: dsa fabric id @@ -755,7 +765,7 @@ static void hns_dsaf_tbl_tcam_data_ucast_pul( void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en) { - if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) + if (AE_IS_VER1(dsaf_dev->dsaf_ver) && !HNS_DSAF_IS_DEBUG(dsaf_dev)) dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en); } @@ -894,15 +904,16 @@ static void hns_dsaf_tcam_uc_cfg( } /** - * hns_dsaf_tcam_mc_cfg - INT - * @dsaf_id: dsa fabric id - * @address, - * @ptbl_tcam_data, - * @ptbl_tcam_mcast, + * hns_dsaf_tcam_mc_cfg - cfg the tcam for mc + * @dsaf_dev: dsa fabric device struct pointer + * @address: tcam index + * @ptbl_tcam_data: tcam data struct pointer + * @ptbl_tcam_mcast: tcam mask struct pointer, it must be null for HNSv1 */ static void hns_dsaf_tcam_mc_cfg( struct dsaf_device *dsaf_dev, u32 address, struct dsaf_tbl_tcam_data *ptbl_tcam_data, + struct dsaf_tbl_tcam_data *ptbl_tcam_mask, struct dsaf_tbl_tcam_mcast_cfg *ptbl_tcam_mcast) { spin_lock_bh(&dsaf_dev->tcam_lock); @@ -913,7 +924,11 @@ static void hns_dsaf_tcam_mc_cfg( hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, ptbl_tcam_data); /*Write Tcam Mcast*/ hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, ptbl_tcam_mcast); - /*Write Plus*/ + /* Write Match Data */ + if (ptbl_tcam_mask) + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, ptbl_tcam_mask); + + /* Write Puls */ hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev); spin_unlock_bh(&dsaf_dev->tcam_lock); @@ -944,6 +959,16 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address) spin_unlock_bh(&dsaf_dev->tcam_lock); } +void hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr) +{ + addr[0] = mac_key->high.bits.mac_0; + addr[1] = mac_key->high.bits.mac_1; + addr[2] = mac_key->high.bits.mac_2; + addr[3] = mac_key->high.bits.mac_3; + addr[4] = mac_key->low.bits.mac_4; + addr[5] = mac_key->low.bits.mac_5; +} + /** * hns_dsaf_tcam_uc_get - INT * @dsaf_id: dsa fabric id @@ -1369,6 +1394,12 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev) if (HNS_DSAF_IS_DEBUG(dsaf_dev)) return 0; + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + dsaf_dev->tcam_max_num = DSAF_TCAM_SUM; + else + dsaf_dev->tcam_max_num = + DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; + spin_lock_init(&dsaf_dev->tcam_lock); ret = hns_dsaf_init_hw(dsaf_dev); if (ret) @@ -1424,7 +1455,7 @@ static u16 hns_dsaf_find_soft_mac_entry( u32 i; soft_mac_entry = priv->soft_mac_tbl; - for (i = 0; i < DSAF_TCAM_SUM; i++) { + for (i = 0; i < dsaf_dev->tcam_max_num; i++) { /* invall tab entry */ if ((soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX) && (soft_mac_entry->tcam_key.high.val == mac_key->high.val) && @@ -1449,7 +1480,7 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev) u32 i; soft_mac_entry = priv->soft_mac_tbl; - for (i = 0; i < DSAF_TCAM_SUM; i++) { + for (i = 0; i < dsaf_dev->tcam_max_num; i++) { /* inv all entry */ if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX) /* return find result --soft index */ @@ -1488,8 +1519,12 @@ static void hns_dsaf_set_mac_key( mac_key->high.bits.mac_3 = addr[3]; mac_key->low.bits.mac_4 = addr[4]; mac_key->low.bits.mac_5 = addr[5]; - mac_key->low.bits.vlan = vlan_id; - mac_key->low.bits.port = port; + dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M, + DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); + dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, port); + + mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan); } /** @@ -1507,6 +1542,7 @@ int hns_dsaf_set_mac_uc_entry( struct dsaf_drv_priv *priv = (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; + struct dsaf_tbl_tcam_data tcam_data; /* mac addr check */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1548,9 +1584,10 @@ int hns_dsaf_set_mac_uc_entry( /* default config dvc to 0 */ mac_data.tbl_ucast_dvc = 0; mac_data.tbl_ucast_out_port = mac_entry->port_num; - hns_dsaf_tcam_uc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data); /* config software entry */ soft_mac_entry += entry_index; @@ -1561,6 +1598,55 @@ int hns_dsaf_set_mac_uc_entry( return 0; } +int hns_dsaf_rm_mac_addr( + struct dsaf_device *dsaf_dev, + struct dsaf_drv_mac_single_dest_entry *mac_entry) +{ + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_tbl_tcam_ucast_cfg mac_data; + struct dsaf_drv_tbl_tcam_key mac_key; + + /* mac addr check */ + if (!is_valid_ether_addr(mac_entry->addr)) { + dev_err(dsaf_dev->dev, "rm_uc_addr %s Mac %pM err!\n", + dsaf_dev->ae_dev.name, mac_entry->addr); + return -EINVAL; + } + + /* config key */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id, + mac_entry->in_port_num, mac_entry->addr); + + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + /* can not find the tcam entry, return 0 */ + dev_info(dsaf_dev->dev, + "rm_uc_addr no tcam, %s Mac key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, + mac_key.high.val, mac_key.low.val); + return 0; + } + + dev_dbg(dsaf_dev->dev, + "rm_uc_addr, %s Mac key(%#x:%#x) entry_index%d\n", + dsaf_dev->ae_dev.name, mac_key.high.val, + mac_key.low.val, entry_index); + + hns_dsaf_tcam_uc_get( + dsaf_dev, entry_index, + (struct dsaf_tbl_tcam_data *)&mac_key, + &mac_data); + + /* unicast entry not used locally should not clear */ + if (mac_entry->port_num != mac_data.tbl_ucast_out_port) + return -EFAULT; + + return hns_dsaf_del_mac_entry(dsaf_dev, + mac_entry->in_vlan_id, + mac_entry->in_port_num, + mac_entry->addr); +} + /** * hns_dsaf_set_mac_mc_entry - set mac mc-entry * @dsaf_dev: dsa fabric device struct pointer @@ -1577,6 +1663,7 @@ int hns_dsaf_set_mac_mc_entry( (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; /* mac addr check */ if (MAC_IS_ALL_ZEROS(mac_entry->addr)) { @@ -1609,9 +1696,12 @@ int hns_dsaf_set_mac_mc_entry( 0, sizeof(mac_data.tbl_mcast_port_msk)); } else { /* config hardware entry */ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, + &mac_data); + + tmp_mac_key.high.val = + le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); } mac_data.tbl_mcast_old_en = 0; mac_data.tbl_mcast_item_vld = 1; @@ -1623,9 +1713,11 @@ int hns_dsaf_set_mac_mc_entry( dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL, + &mac_data); /* config software entry */ soft_mac_entry += entry_index; @@ -1636,6 +1728,16 @@ int hns_dsaf_set_mac_mc_entry( return 0; } +static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src) +{ + u16 *a = (u16 *)dst; + const u16 *b = (const u16 *)src; + + a[0] &= b[0]; + a[1] &= b[1]; + a[2] &= b[2]; +} + /** * hns_dsaf_add_mac_mc_port - add mac mc-port * @dsaf_dev: dsa fabric device struct pointer @@ -1646,11 +1748,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, { u16 entry_index = DSAF_INVALID_ENTRY_IDX; struct dsaf_drv_tbl_tcam_key mac_key; + struct dsaf_drv_tbl_tcam_key mask_key; + struct dsaf_tbl_tcam_data *pmask_key = NULL; struct dsaf_tbl_tcam_mcast_cfg mac_data; - struct dsaf_drv_priv *priv = - (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; + u8 mc_addr[ETH_ALEN]; + u8 *mc_mask; int mskid; /*chechk mac addr */ @@ -1660,14 +1766,32 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } + ether_addr_copy(mc_addr, mac_entry->addr); + mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask; + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + /* prepare for key data setting */ + hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask); + + /* config key mask */ + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, + 0x0, + 0xff, + mc_mask); + + mask_key.high.val = le32_to_cpu(mask_key.high.val); + mask_key.low.val = le32_to_cpu(mask_key.low.val); + + pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); + } + /*config key */ hns_dsaf_set_mac_key( dsaf_dev, &mac_key, mac_entry->in_vlan_id, - mac_entry->in_port_num, mac_entry->addr); + mac_entry->in_port_num, mc_addr); memset(&mac_data, 0, sizeof(struct dsaf_tbl_tcam_mcast_cfg)); - /*check exist? */ + /* check if the tcam is exist */ entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); if (entry_index == DSAF_INVALID_ENTRY_IDX) { /*if hasnot , find a empty*/ @@ -1681,11 +1805,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } } else { - /*if exist, add in */ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + /* if exist, add in */ + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, + &mac_data); + + tmp_mac_key.high.val = + le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); } + /* config hardware entry */ if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) { mskid = mac_entry->port_num; @@ -1708,9 +1836,12 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + /* config mc entry with mask */ + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, + pmask_key, &mac_data); /*config software entry */ soft_mac_entry += entry_index; @@ -1782,15 +1913,18 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, { u16 entry_index = DSAF_INVALID_ENTRY_IDX; struct dsaf_drv_tbl_tcam_key mac_key; - struct dsaf_drv_priv *priv = - (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; u16 vlan_id; u8 in_port_num; struct dsaf_tbl_tcam_mcast_cfg mac_data; - struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; int mskid; const u8 empty_msk[sizeof(mac_data.tbl_mcast_port_msk)] = {0}; + struct dsaf_drv_tbl_tcam_key mask_key, tmp_mac_key; + struct dsaf_tbl_tcam_data *pmask_key = NULL; + u8 mc_addr[ETH_ALEN]; + u8 *mc_mask; if (!(void *)mac_entry) { dev_err(dsaf_dev->dev, @@ -1798,10 +1932,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } - /*get key info*/ - vlan_id = mac_entry->in_vlan_id; - in_port_num = mac_entry->in_port_num; - /*check mac addr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr)) { dev_err(dsaf_dev->dev, "del_port failed, addr %pM!\n", @@ -1809,11 +1939,31 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } - /*config key */ - hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, - mac_entry->addr); + /* always mask vlan_id field */ + ether_addr_copy(mc_addr, mac_entry->addr); + mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask; + + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + /* prepare for key data setting */ + hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask); + + /* config key mask */ + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_addr); + + mask_key.high.val = le32_to_cpu(mask_key.high.val); + mask_key.low.val = le32_to_cpu(mask_key.low.val); - /*check is exist? */ + pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); + } + + /* get key info */ + vlan_id = mac_entry->in_vlan_id; + in_port_num = mac_entry->in_port_num; + + /* config key */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, mc_addr); + + /* check if the tcam entry is exist */ entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); if (entry_index == DSAF_INVALID_ENTRY_IDX) { /*find none */ @@ -1829,10 +1979,11 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - /*read entry*/ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + /* read entry */ + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + tmp_mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); /*del the port*/ if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) { @@ -1857,15 +2008,87 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, /* del soft entry */ soft_mac_entry += entry_index; soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; - } else { /* not zer, just del port, updata*/ - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + } else { /* not zero, just del port, update */ + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, + &tcam_data, + pmask_key, &mac_data); } return 0; } +int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, + u8 port_num) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + struct dsaf_tbl_tcam_mcast_cfg mac_data; + int ret = 0, i; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return 0; + + for (i = 0; i < DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; i++) { + u8 addr[ETH_ALEN]; + u8 port; + + soft_mac_entry = priv->soft_mac_tbl + i; + + hns_dsaf_tcam_addr_get(&soft_mac_entry->tcam_key, addr); + port = dsaf_get_field( + soft_mac_entry->tcam_key.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S); + /* check valid tcam mc entry */ + if (soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX && + port == mac_id && + is_multicast_ether_addr(addr) && + !is_broadcast_ether_addr(addr)) { + const u32 empty_msk[DSAF_PORT_MSK_NUM] = {0}; + struct dsaf_drv_mac_single_dest_entry mac_entry; + + /* disable receiving of this multicast address for + * the VF. + */ + ether_addr_copy(mac_entry.addr, addr); + mac_entry.in_vlan_id = dsaf_get_field( + soft_mac_entry->tcam_key.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_VLAN_M, + DSAF_TBL_TCAM_KEY_VLAN_S); + mac_entry.in_port_num = mac_id; + mac_entry.port_num = port_num; + if (hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry)) { + ret = -EINVAL; + continue; + } + + /* disable receiving of this multicast address for + * the mac port if all VF are disable + */ + hns_dsaf_tcam_mc_get(dsaf_dev, i, + (struct dsaf_tbl_tcam_data *) + (&soft_mac_entry->tcam_key), + &mac_data); + dsaf_set_bit(mac_data.tbl_mcast_port_msk[mac_id / 32], + mac_id % 32, 0); + if (!memcmp(mac_data.tbl_mcast_port_msk, empty_msk, + sizeof(u32) * DSAF_PORT_MSK_NUM)) { + mac_entry.port_num = mac_id; + if (hns_dsaf_del_mac_mc_port(dsaf_dev, + &mac_entry)) { + ret = -EINVAL; + continue; + } + } + } + } + + return ret; +} + /** * hns_dsaf_get_mac_uc_entry - get mac uc entry * @dsaf_dev: dsa fabric device struct pointer @@ -1878,6 +2101,7 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev, struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_tbl_tcam_ucast_cfg mac_data; + struct dsaf_tbl_tcam_data tcam_data; /* check macaddr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1906,9 +2130,12 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - /*read entry*/ - hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + /* read entry */ + hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); + mac_entry->port_num = mac_data.tbl_ucast_out_port; return 0; @@ -1926,6 +2153,7 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev, struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_tbl_tcam_mcast_cfg mac_data; + struct dsaf_tbl_tcam_data tcam_data; /*check mac addr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1955,8 +2183,10 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev, mac_key.low.val, entry_index); /*read entry */ - hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F; return 0; @@ -1976,9 +2206,10 @@ int hns_dsaf_get_mac_entry_by_index( struct dsaf_tbl_tcam_mcast_cfg mac_data; struct dsaf_tbl_tcam_ucast_cfg mac_uc_data; - char mac_addr[MAC_NUM_OCTETS_PER_ADDR] = {0}; + struct dsaf_tbl_tcam_data tcam_data; + char mac_addr[ETH_ALEN] = {0}; - if (entry_index >= DSAF_TCAM_SUM) { + if (entry_index >= dsaf_dev->tcam_max_num) { /* find none, del error */ dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n", dsaf_dev->ae_dev.name); @@ -1986,8 +2217,10 @@ int hns_dsaf_get_mac_entry_by_index( } /* mc entry, do read opt */ - hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F; @@ -2004,9 +2237,12 @@ int hns_dsaf_get_mac_entry_by_index( /**mc donot do*/ } else { /*is not mc, just uc... */ - hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, + hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_uc_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); + mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port); } @@ -2670,6 +2906,59 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +/* Reserve the last TCAM entry for promisc support */ +#define dsaf_promisc_tcam_entry(port) \ + (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port)) +void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, + u32 port, bool enable) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; + u16 entry_index; + struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask; + struct dsaf_tbl_tcam_mcast_cfg mac_data = {0}; + + if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev)) + return; + + /* find the tcam entry index for promisc */ + entry_index = dsaf_promisc_tcam_entry(port); + + /* config key mask */ + if (enable) { + memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); + memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); + dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, port); + dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, 0xf); + + /* SUB_QID */ + dsaf_set_bit(mac_data.tbl_mcast_port_msk[0], + DSAF_SERVICE_NW_NUM, true); + mac_data.tbl_mcast_item_vld = true; /* item_vld bit */ + } else { + mac_data.tbl_mcast_item_vld = false; /* item_vld bit */ + } + + dev_dbg(dsaf_dev->dev, + "set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n", + dsaf_dev->ae_dev.name, tbl_tcam_data.high.val, + tbl_tcam_data.low.val, entry_index); + + /* config promisc entry with mask */ + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, + (struct dsaf_tbl_tcam_data *)&tbl_tcam_data, + (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask, + &mac_data); + + /* config software entry */ + soft_mac_entry += entry_index; + soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; +} + /** * dsaf_probe - probo dsaf dev * @pdev: dasf platform device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index c494fc52be74..cef6bf46ae93 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -35,8 +35,6 @@ struct hns_mac_cb; #define DSAF_CFG_READ_CNT 30 -#define MAC_NUM_OCTETS_PER_ADDR 6 - #define DSAF_DUMP_REGS_NUM 504 #define DSAF_STATIC_NUM 28 #define DSAF_V2_STATIC_NUM 44 @@ -165,7 +163,7 @@ enum dsaf_mode { /*mac entry, mc or uc entry*/ struct dsaf_drv_mac_single_dest_entry { /* mac addr, match the entry*/ - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 in_vlan_id; /* value of VlanId */ /* the vld input port num, dsaf-mode fix 0, */ @@ -179,7 +177,7 @@ struct dsaf_drv_mac_single_dest_entry { /*only mc entry*/ struct dsaf_drv_mac_multi_dest_entry { /* mac addr, match the entry*/ - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 in_vlan_id; /* this mac addr output port,*/ /* bit0-bit5 means Port0-Port5(1bit is vld)**/ @@ -308,8 +306,6 @@ struct dsaf_misc_op { /* reset series function, it will be reset if the dereset is 0 */ void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset); void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); - void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port, - bool dereset); void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset); @@ -343,6 +339,7 @@ struct dsaf_device { enum hal_dsaf_mode dsaf_en; enum hal_dsaf_tc_mode dsaf_tc_mode; u32 dsaf_ver; + u16 tcam_max_num; /* max TCAM entry for user except promisc */ struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM]; struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM]; @@ -360,6 +357,11 @@ static inline void *hns_dsaf_dev_priv(const struct dsaf_device *dsaf_dev) return (void *)((u8 *)dsaf_dev + sizeof(*dsaf_dev)); } +#define DSAF_TBL_TCAM_KEY_PORT_S 0 +#define DSAF_TBL_TCAM_KEY_PORT_M (((1ULL << 4) - 1) << 0) +#define DSAF_TBL_TCAM_KEY_VLAN_S 4 +#define DSAF_TBL_TCAM_KEY_VLAN_M (((1ULL << 12) - 1) << 4) + struct dsaf_drv_tbl_tcam_key { union { struct { @@ -373,11 +375,9 @@ struct dsaf_drv_tbl_tcam_key { } high; union { struct { - u32 port:4; /* port id, */ - /* dsaf-mode fixed 0, non-dsaf-mode port id*/ - u32 vlan:12; /* vlan id */ - u32 mac_5:8; - u32 mac_4:8; + u16 port_vlan; + u8 mac_5; + u8 mac_4; } bits; u32 val; @@ -461,10 +461,19 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port, void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data); int hns_dsaf_get_regs_count(void); void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en); +void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, + u32 port, bool enable); void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id, u32 *en); int hns_dsaf_set_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id, u32 en); +int hns_dsaf_rm_mac_addr( + struct dsaf_device *dsaf_dev, + struct dsaf_drv_mac_single_dest_entry *mac_entry); + +int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, + u8 mac_id, u8 port_num); + #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 67accce1d33d..a2c22d084ce9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -23,7 +23,6 @@ enum _dsm_op_index { enum _dsm_rst_type { HNS_DSAF_RESET_FUNC = 0x1, HNS_PPE_RESET_FUNC = 0x2, - HNS_XGE_CORE_RESET_FUNC = 0x3, HNS_XGE_RESET_FUNC = 0x4, HNS_GE_RESET_FUNC = 0x5, HNS_DSAF_CHN_RESET_FUNC = 0x6, @@ -213,26 +212,6 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev, HNS_XGE_RESET_FUNC, port, dereset); } -static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, - u32 port, bool dereset) -{ - u32 reg_val = 0; - u32 reg_addr; - - if (port >= DSAF_XGE_NUM) - return; - - reg_val |= XGMAC_TRX_CORE_SRST_M - << dsaf_dev->mac_cb[port]->port_rst_off; - - if (!dereset) - reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG; - else - reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG; - - dsaf_write_sub(dsaf_dev, reg_addr, reg_val); -} - /** * hns_dsaf_srst_chns - reset dsaf channels * @dsaf_dev: dsaf device struct pointer @@ -293,14 +272,6 @@ void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset) HNS_ROCE_RESET_FUNC, 0, dereset); } -static void -hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev, - u32 port, bool dereset) -{ - hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, - HNS_XGE_CORE_RESET_FUNC, port, dereset); -} - static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, bool dereset) { @@ -597,7 +568,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->dsaf_reset = hns_dsaf_rst; misc_op->xge_srst = hns_dsaf_xge_srst_by_port; - misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port; misc_op->ge_srst = hns_dsaf_ge_srst_by_port; misc_op->ppe_srst = hns_ppe_srst_by_port; misc_op->ppe_comm_srst = hns_ppe_com_srst; @@ -615,7 +585,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->dsaf_reset = hns_dsaf_rst_acpi; misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi; - misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port_acpi; misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi; misc_op->ppe_srst = hns_ppe_srst_by_port_acpi; misc_op->ppe_comm_srst = hns_ppe_com_srst; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 878950a42e6c..87226685f742 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -41,6 +41,9 @@ #define DSAF_SW_PORT_NUM 8 #define DSAF_TOTAL_QUEUE_NUM 129 +/* reserved a tcam entry for each port to support promisc by fuzzy match */ +#define DSAFV2_MAC_FUZZY_TCAM_NUM DSAF_MAX_PORT_NUM + #define DSAF_TCAM_SUM 512 #define DSAF_LINE_SUM (2048 * 14) @@ -297,6 +300,8 @@ #define DSAF_TBL_LKUP_NUM_I_0_REG 0x50C0 #define DSAF_TBL_LKUP_NUM_O_0_REG 0x50E0 #define DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG 0x510C +#define DSAF_TBL_TCAM_MATCH_CFG_H_REG 0x5130 +#define DSAF_TBL_TCAM_MATCH_CFG_L_REG 0x5134 #define DSAF_INODE_FIFO_WL_0_REG 0x6000 #define DSAF_ONODE_FIFO_WL_0_REG 0x6020 @@ -309,7 +314,6 @@ #define PPE_COM_INTEN_REG 0x110 #define PPE_COM_RINT_REG 0x114 #define PPE_COM_INTSTS_REG 0x118 -#define PPE_COM_COMMON_CNT_CLR_CE_REG 0x1120 #define PPE_COM_HIS_RX_PKT_QID_DROP_CNT_REG 0x300 #define PPE_COM_HIS_RX_PKT_QID_OK_CNT_REG 0x600 #define PPE_COM_HIS_TX_PKT_QID_ERR_CNT_REG 0x900 @@ -698,8 +702,6 @@ #define XGMAC_RX_SYMBOLERRPKTS 0x0210 #define XGMAC_RX_FCSERRPKTS 0x0218 -#define XGMAC_TRX_CORE_SRST_M 0x2080 - #define DSAF_SRAM_INIT_OVER_M 0xff #define DSAFV2_SRAM_INIT_OVER_M 0x3ff #define DSAF_SRAM_INIT_OVER_S 0 @@ -978,6 +980,11 @@ #define XGMAC_ENABLE_TX_B 0 #define XGMAC_ENABLE_RX_B 1 +#define XGMAC_UNIDIR_EN_B 0 +#define XGMAC_RF_TX_EN_B 1 +#define XGMAC_LF_RF_INSERT_S 2 +#define XGMAC_LF_RF_INSERT_M (0x3 << XGMAC_LF_RF_INSERT_S) + #define XGMAC_CTL_TX_FCS_B 0 #define XGMAC_CTL_TX_PAD_B 1 #define XGMAC_CTL_TX_PREAMBLE_TRANS_B 3 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index 8f4f0e8da984..aae830a93050 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -108,6 +108,31 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value) } /** + * hns_xgmac_tx_lf_rf_insert - insert lf rf control about xgmac + * @mac_drv: mac driver + * @mode: inserf rf or lf + */ +static void hns_xgmac_lf_rf_insert(struct mac_driver *mac_drv, u32 mode) +{ + dsaf_set_dev_field(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, + XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, mode); +} + +/** + * hns_xgmac__lf_rf_control_init - initial the lf rf control register + * @mac_drv: mac driver + */ +static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv) +{ + u32 val = 0; + + dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0); + dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1); + dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0); + dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); +} + +/** *hns_xgmac_enable - enable xgmac port *@drv: mac driver *@mode: mode of mac port @@ -115,12 +140,8 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value) static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - struct dsaf_device *dsaf_dev - = (struct dsaf_device *)dev_get_drvdata(drv->dev); - u32 port = drv->mac_id; - dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 1); - mdelay(10); + hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_NO_LF_RF_INSERT); /*enable XGE rX/tX */ if (mode == MAC_COMM_MODE_TX) { @@ -143,9 +164,6 @@ static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode) static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - struct dsaf_device *dsaf_dev - = (struct dsaf_device *)dev_get_drvdata(drv->dev); - u32 port = drv->mac_id; if (mode == MAC_COMM_MODE_TX) { hns_xgmac_tx_enable(drv, 0); @@ -155,9 +173,7 @@ static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode) hns_xgmac_tx_enable(drv, 0); hns_xgmac_rx_enable(drv, 0); } - - mdelay(10); - dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 0); + hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_LF_INSERT); } /** @@ -203,6 +219,7 @@ static void hns_xgmac_init(void *mac_drv) dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1); mdelay(100); + hns_xgmac_lf_rf_control_init(drv); hns_xgmac_exc_irq_en(drv, 0); hns_xgmac_pma_fec_enable(drv, 0x0, 0x0); @@ -788,7 +805,7 @@ static int hns_xgmac_get_sset_count(int stringset) */ static int hns_xgmac_get_regs_count(void) { - return ETH_XGMAC_DUMP_NUM; + return HNS_XGMAC_DUMP_NUM; } void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h index 139f7297c7b4..da6c5343d3e1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h @@ -10,6 +10,7 @@ #ifndef _HNS_XGMAC_H #define _HNS_XGMAC_H -#define ETH_XGMAC_DUMP_NUM (214) - +#define HNS_XGMAC_DUMP_NUM 214 +#define HNS_XGMAC_NO_LF_RF_INSERT 0x0 +#define HNS_XGMAC_LF_INSERT 0x2 #endif diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index dff7b60345d8..776d81e785d8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -22,6 +22,7 @@ #include "hnae.h" #include "hns_enet.h" +#include "hns_dsaf_mac.h" #define NIC_MAX_Q_PER_VF 16 #define HNS_NIC_TX_TIMEOUT (5 * HZ) @@ -1426,10 +1427,6 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu) struct hnae_handle *h = priv->ae_handle; int ret; - /* MTU < 68 is an error and causes problems on some kernels */ - if (new_mtu < 68) - return -EINVAL; - if (!h->dev->ops->set_mtu) return -ENOTSUPP; @@ -1496,6 +1493,29 @@ static netdev_features_t hns_nic_fix_features( return features; } +static int hns_nic_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (h->dev->ops->add_uc_addr) + return h->dev->ops->add_uc_addr(h, addr); + + return 0; +} + +static int hns_nic_uc_unsync(struct net_device *netdev, + const unsigned char *addr) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (h->dev->ops->rm_uc_addr) + return h->dev->ops->rm_uc_addr(h, addr); + + return 0; +} + /** * nic_set_multicast_list - set mutl mac address * @netdev: net device @@ -1514,6 +1534,10 @@ void hns_set_multicast_list(struct net_device *ndev) return; } + if (h->dev->ops->clr_mc_addr) + if (h->dev->ops->clr_mc_addr(h)) + netdev_err(ndev, "clear multicast address fail\n"); + if (h->dev->ops->set_mc_addr) { netdev_for_each_mc_addr(ha, ndev) if (h->dev->ops->set_mc_addr(h, ha->addr)) @@ -1534,6 +1558,9 @@ void hns_nic_set_rx_mode(struct net_device *ndev) } hns_set_multicast_list(ndev); + + if (__dev_uc_sync(ndev, hns_nic_uc_sync, hns_nic_uc_unsync)) + netdev_err(ndev, "sync uc address fail\n"); } struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev, @@ -1992,14 +2019,20 @@ static int hns_nic_dev_probe(struct platform_device *pdev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ndev->vlan_features |= NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + /* MTU range: 68 - 9578 (v1) or 9706 (v2) */ + ndev->min_mtu = MAC_MIN_MTU; switch (priv->enet_ver) { case AE_VERSION_2: ndev->features |= NETIF_F_TSO | NETIF_F_TSO6; ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; + ndev->max_mtu = MAC_MAX_MTU_V2 - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); break; default: + ndev->max_mtu = MAC_MAX_MTU - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); break; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 87d5c94b2810..3ac2183dbd21 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1178,7 +1178,8 @@ static int hns_nic_nway_reset(struct net_device *netdev) struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { - if (phy) + /* if autoneg is disabled, don't restart auto-negotiation */ + if (phy && phy->autoneg == AUTONEG_ENABLE) ret = genphy_restart_aneg(phy); } diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c index 631dbc7b4dbb..1a31bee6e728 100644 --- a/drivers/net/ethernet/hp/hp100.c +++ b/drivers/net/ethernet/hp/hp100.c @@ -427,7 +427,6 @@ static const struct net_device_ops hp100_bm_netdev_ops = { .ndo_start_xmit = hp100_start_xmit_bm, .ndo_get_stats = hp100_get_stats, .ndo_set_rx_mode = hp100_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -438,7 +437,6 @@ static const struct net_device_ops hp100_netdev_ops = { .ndo_start_xmit = hp100_start_xmit, .ndo_get_stats = hp100_get_stats, .ndo_set_rx_mode = hp100_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index ce235b776793..945883842533 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1118,7 +1118,6 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_start_xmit = i596_start_xmit, .ndo_set_rx_mode = set_multicast_list, .ndo_tx_timeout = i596_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index 5d353c660068..dc983450354b 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -981,7 +981,6 @@ static const struct net_device_ops ether1_netdev_ops = { .ndo_set_rx_mode = ether1_setmulticastlist, .ndo_tx_timeout = ether1_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index 3dbc53c21baa..e86773325cbe 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1037,7 +1037,6 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_start_xmit = i596_start_xmit, .ndo_set_rx_mode = set_multicast_list, .ndo_tx_timeout = i596_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 21c84cc9c871..8bb15a8c2a40 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -337,7 +337,6 @@ static const struct net_device_ops sun3_82586_netdev_ops = { .ndo_get_stats = sun3_82586_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, }; static int __init sun3_82586_probe1(struct net_device *dev,int ioaddr) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index bd719e25dd76..fa66fa6f8bee 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1981,14 +1981,6 @@ out: ehea_update_bcmc_registrations(); } -static int ehea_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe) { swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC; @@ -2970,7 +2962,6 @@ static const struct net_device_ops ehea_netdev_ops = { .ndo_set_mac_address = ehea_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = ehea_set_multicast_list, - .ndo_change_mtu = ehea_change_mtu, .ndo_vlan_rx_add_vid = ehea_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ehea_vlan_rx_kill_vid, .ndo_tx_timeout = ehea_tx_watchdog, @@ -3043,6 +3034,10 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, NETIF_F_IP_CSUM; dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; + /* MTU range: 68 - 9022 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = EHEA_MAX_PACKET_SIZE; + INIT_WORK(&port->reset_task, ehea_reset_port); INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 8f139197f1aa..52a69c925965 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -1099,9 +1099,6 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) struct emac_instance *dev = netdev_priv(ndev); int ret = 0; - if (new_mtu < EMAC_MIN_MTU || new_mtu > dev->max_mtu) - return -EINVAL; - DBG(dev, "change_mtu(%d)" NL, new_mtu); if (netif_running(ndev)) { @@ -2564,7 +2561,7 @@ static int emac_init_config(struct emac_instance *dev) if (emac_read_uint_prop(np, "cell-index", &dev->cell_index, 1)) return -ENXIO; if (emac_read_uint_prop(np, "max-frame-size", &dev->max_mtu, 0)) - dev->max_mtu = 1500; + dev->max_mtu = ETH_DATA_LEN; if (emac_read_uint_prop(np, "rx-fifo-size", &dev->rx_fifo_size, 0)) dev->rx_fifo_size = 2048; if (emac_read_uint_prop(np, "tx-fifo-size", &dev->tx_fifo_size, 0)) @@ -2718,7 +2715,6 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, - .ndo_change_mtu = eth_change_mtu, }; static const struct net_device_ops emac_gige_netdev_ops = { @@ -2891,6 +2887,10 @@ static int emac_probe(struct platform_device *ofdev) ndev->netdev_ops = &emac_netdev_ops; ndev->ethtool_ops = &emac_ethtool_ops; + /* MTU range: 46 - 1500 or whatever is in OF */ + ndev->min_mtu = EMAC_MIN_MTU; + ndev->max_mtu = dev->max_mtu; + netif_carrier_off(ndev); err = register_netdev(ndev); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index ebe60719e489..4a81c892fc31 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1349,9 +1349,6 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) int i, rc; int need_restart = 0; - if (new_mtu < IBMVETH_MIN_MTU) - return -EINVAL; - for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) break; @@ -1551,6 +1548,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->hw_features |= NETIF_F_TSO; } + netdev->min_mtu = IBMVETH_MIN_MTU; + netdev->max_mtu = ETH_MAX_MTU; + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); if (firmware_has_feature(FW_FEATURE_CMO)) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4f3281a03e7e..1e486d1312e9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -902,17 +902,6 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - if (new_mtu > adapter->req_mtu || new_mtu < adapter->min_mtu) - return -EINVAL; - - netdev->mtu = new_mtu; - return 0; -} - static void ibmvnic_tx_timeout(struct net_device *dev) { struct ibmvnic_adapter *adapter = netdev_priv(dev); @@ -1029,7 +1018,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = { .ndo_set_rx_mode = ibmvnic_set_multi, .ndo_set_mac_address = ibmvnic_set_mac, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ibmvnic_change_mtu, .ndo_tx_timeout = ibmvnic_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmvnic_netpoll_controller, @@ -2639,10 +2627,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, break; case MIN_MTU: adapter->min_mtu = be64_to_cpu(crq->query_capability.number); + netdev->min_mtu = adapter->min_mtu; netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); break; case MAX_MTU: adapter->max_mtu = be64_to_cpu(crq->query_capability.number); + netdev->max_mtu = adapter->max_mtu; netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); break; case MAX_MULTICAST_FILTERS: @@ -3668,6 +3658,8 @@ static void handle_crq_init_rsp(struct work_struct *work) netdev->real_num_tx_queues = adapter->req_tx_queues; netdev->mtu = adapter->req_mtu; + netdev->min_mtu = adapter->min_mtu; + netdev->max_mtu = adapter->max_mtu; if (adapter->failover) { adapter->failover = false; diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 068789e694c9..25c6dfd500b4 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2286,14 +2286,6 @@ static int e100_set_mac_address(struct net_device *netdev, void *p) return 0; } -static int e100_change_mtu(struct net_device *netdev, int new_mtu) -{ - if (new_mtu < ETH_ZLEN || new_mtu > ETH_DATA_LEN) - return -EINVAL; - netdev->mtu = new_mtu; - return 0; -} - static int e100_asf(struct nic *nic) { /* ASF can be enabled from eeprom */ @@ -2834,7 +2826,6 @@ static const struct net_device_ops e100_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, - .ndo_change_mtu = e100_change_mtu, .ndo_do_ioctl = e100_do_ioctl, .ndo_tx_timeout = e100_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index f42129d09e2c..33076fa98002 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1085,6 +1085,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_vendor_id != PCI_VENDOR_ID_VMWARE) netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 46 - 16110 */ + netdev->min_mtu = ETH_ZLEN - ETH_HLEN; + netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); /* initialize eeprom parameters */ @@ -3549,13 +3553,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; - - if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || - (max_frame > MAX_JUMBO_FRAME_SIZE)) { - e_err(probe, "Invalid MTU setting\n"); - return -EINVAL; - } + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; /* Adapter-specific max frame size limits. */ switch (hw->mac_type) { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 7017281ba2dc..8759d9236930 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5974,19 +5974,12 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) && + if ((new_mtu > ETH_DATA_LEN) && !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { e_err("Jumbo Frames not supported.\n"); return -EINVAL; } - /* Supported frame sizes */ - if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) || - (max_frame > adapter->max_hw_frame_size)) { - e_err("Unsupported MTU setting\n"); - return -EINVAL; - } - /* Jumbo frame workaround on 82579 and newer requires CRC be stripped */ if ((adapter->hw.mac.type >= e1000_pch2lan) && !(adapter->flags2 & FLAG2_CRC_STRIPPING) && @@ -7187,6 +7180,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->vlan_features |= NETIF_F_HIGHDMA; } + /* MTU range: 68 - max_hw_frame_size */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = adapter->max_hw_frame_size - + (VLAN_ETH_HLEN + ETH_FCS_LEN); + if (e1000e_enable_mng_pass_thru(&adapter->hw)) adapter->flags |= FLAG_MNG_PT_ENABLED; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 05629381be6b..bc5ef6eb3dd6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -706,16 +706,6 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) return err; } -static int fm10k_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > FM10K_MAX_JUMBO_FRAME_SIZE) - return -EINVAL; - - dev->mtu = new_mtu; - - return 0; -} - /** * fm10k_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure @@ -1405,7 +1395,6 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_start_xmit = fm10k_xmit_frame, .ndo_set_mac_address = fm10k_set_mac, - .ndo_change_mtu = fm10k_change_mtu, .ndo_tx_timeout = fm10k_tx_timeout, .ndo_vlan_rx_add_vid = fm10k_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, @@ -1490,5 +1479,9 @@ struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info) dev->hw_features |= hw_features; + /* MTU range: 68 - 15342 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = FM10K_MAX_JUMBO_FRAME_SIZE; + return dev; } diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6d61e443bdf8..29c23183a0e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -39,6 +39,7 @@ #include <linux/iommu.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/hashtable.h> #include <linux/string.h> #include <linux/in.h> #include <linux/ip.h> @@ -428,11 +429,13 @@ struct i40e_pf { struct ptp_clock_info ptp_caps; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; - unsigned long last_rx_ptp_check; - spinlock_t tmreg_lock; /* Used to protect the device time registers. */ + struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */ u64 ptp_base_adj; u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + u32 latch_event_flags; + spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */ + unsigned long latch_events[4]; bool ptp_tx; bool ptp_rx; u16 rss_table_size; /* HW RSS table size */ @@ -445,6 +448,20 @@ struct i40e_pf { u16 phy_led_val; }; +/** + * i40e_mac_to_hkey - Convert a 6-byte MAC Address to a u64 hash key + * @macaddr: the MAC Address as the base key + * + * Simply copies the address and returns it as a u64 for hashing + **/ +static inline u64 i40e_addr_to_hkey(const u8 *macaddr) +{ + u64 key = 0; + + ether_addr_copy((u8 *)&key, macaddr); + return key; +} + enum i40e_filter_state { I40E_FILTER_INVALID = 0, /* Invalid state */ I40E_FILTER_NEW, /* New, not sent to FW yet */ @@ -454,13 +471,10 @@ enum i40e_filter_state { /* There is no 'removed' state; the filter struct is freed */ }; struct i40e_mac_filter { - struct list_head list; + struct hlist_node hlist; u8 macaddr[ETH_ALEN]; #define I40E_VLAN_ANY -1 s16 vlan; - u8 counter; /* number of instances of this filter */ - bool is_vf; /* filter belongs to a VF */ - bool is_netdev; /* filter belongs to a netdev */ enum i40e_filter_state state; }; @@ -501,9 +515,11 @@ struct i40e_vsi { #define I40E_VSI_FLAG_VEB_OWNER BIT(1) unsigned long flags; - /* Per VSI lock to protect elements/list (MAC filter) */ - spinlock_t mac_filter_list_lock; - struct list_head mac_filter_list; + /* Per VSI lock to protect elements/hash (MAC filter) */ + spinlock_t mac_filter_hash_lock; + /* Fixed size hash table with 2^8 buckets for MAC filters */ + DECLARE_HASHTABLE(mac_filter_hash, 8); + bool has_vlan_filter; /* VSI stats */ struct rtnl_link_stats64 net_stats; @@ -608,6 +624,8 @@ struct i40e_q_vector { unsigned long hung_detected; /* Set/Reset for hung_detection logic */ cpumask_t affinity_mask; + struct irq_affinity_notify affinity_notify; + struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; @@ -705,6 +723,25 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, u16 rss_table_size, u16 rss_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); +/** + * i40e_find_vsi_by_type - Find and return Flow Director VSI + * @pf: PF to search for VSI + * @type: Value indicating type of VSI we are looking for + **/ +static inline struct i40e_vsi * +i40e_find_vsi_by_type(struct i40e_pf *pf, u16 type) +{ + int i; + + for (i = 0; i < pf->num_alloc_vsi; i++) { + struct i40e_vsi *vsi = pf->vsi[i]; + + if (vsi && vsi->type == type) + return vsi; + } + + return NULL; +} void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi); @@ -721,16 +758,12 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf); bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features); void i40e_set_ethtool_ops(struct net_device *netdev); struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, - u8 *macaddr, s16 vlan, - bool is_vf, bool is_netdev); -void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan, - bool is_vf, bool is_netdev); + const u8 *macaddr, s16 vlan); +void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan); int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); int i40e_vsi_release(struct i40e_vsi *vsi); -struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, enum i40e_vsi_type type, - struct i40e_vsi *start_vsi); #ifdef I40E_FCOE void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt, @@ -740,7 +773,8 @@ void i40e_service_event_schedule(struct i40e_pf *pf); void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len); -int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable); +int i40e_vsi_start_rings(struct i40e_vsi *vsi); +void i40e_vsi_stop_rings(struct i40e_vsi *vsi); int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count); struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc); @@ -816,14 +850,12 @@ int i40e_close(struct net_device *netdev); int i40e_vsi_open(struct i40e_vsi *vsi); void i40e_vlan_stripping_disable(struct i40e_vsi *vsi); int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid); -int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid); -struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev); -int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev); +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid); +struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, + const u8 *macaddr); +int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr); bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); -struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev); +struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); #ifdef I40E_FCOE int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, struct tc_to_netdev *tc); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 738b42a44f20..56fb27298936 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -964,11 +964,11 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc); desc_idx = ntc; + hw->aq.arq_last_status = + (enum i40e_admin_queue_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & I40E_AQ_FLAG_ERR) { ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; - hw->aq.arq_last_status = - (enum i40e_admin_queue_err)le16_to_cpu(desc->retval); i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: Event received with error 0x%X.\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 250db0b244b7..7fe72abc0b4a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -287,6 +287,7 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset) } cdev->client->ops->close(&cdev->lan_info, cdev->client, reset); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); i40e_client_release_qvlist(&cdev->lan_info); } } @@ -406,37 +407,6 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id, } /** - * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi - * @pf: board private structure - * @type: vsi type - * @start_vsi: a VSI pointer from where to start the search - * - * Returns non NULL on success or NULL for failure - **/ -struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, - enum i40e_vsi_type type, - struct i40e_vsi *start_vsi) -{ - struct i40e_vsi *vsi; - int i = 0; - - if (start_vsi) { - for (i = 0; i < pf->num_alloc_vsi; i++) { - vsi = pf->vsi[i]; - if (vsi == start_vsi) - break; - } - } - for (; i < pf->num_alloc_vsi; i++) { - vsi = pf->vsi[i]; - if (vsi && vsi->type == type) - return vsi; - } - - return NULL; -} - -/** * i40e_client_add_instance - add a client instance struct to the instance list * @pf: pointer to the board struct * @client: pointer to a client struct in the client list. @@ -565,7 +535,7 @@ void i40e_client_subtask(struct i40e_pf *pf) if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state)) continue; } else { - dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n", + dev_warn(&pf->pdev->dev, "This client %s is being instantiated at probe\n", client->name); } @@ -575,29 +545,25 @@ void i40e_client_subtask(struct i40e_pf *pf) continue; if (!existing) { - /* Also up the ref_cnt for no. of instances of this - * client. - */ - atomic_inc(&client->ref_cnt); dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", client->name, pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); } mutex_lock(&i40e_client_instance_mutex); - /* Send an Open request to the client */ - atomic_inc(&cdev->ref_cnt); - if (client->ops && client->ops->open) - ret = client->ops->open(&cdev->lan_info, client); - atomic_dec(&cdev->ref_cnt); - if (!ret) { - set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); - } else { - /* remove client instance */ - mutex_unlock(&i40e_client_instance_mutex); - i40e_client_del_instance(pf, client); - atomic_dec(&client->ref_cnt); - continue; + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + /* Send an Open request to the client */ + if (client->ops && client->ops->open) + ret = client->ops->open(&cdev->lan_info, + client); + if (!ret) { + set_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state); + } else { + /* remove client instance */ + i40e_client_del_instance(pf, client); + } } mutex_unlock(&i40e_client_instance_mutex); } @@ -694,10 +660,6 @@ static int i40e_client_release(struct i40e_client *client) continue; pf = (struct i40e_pf *)cdev->lan_info.pf; if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { - if (atomic_read(&cdev->ref_cnt) > 0) { - ret = I40E_ERR_NOT_READY; - goto out; - } if (client->ops && client->ops->close) client->ops->close(&cdev->lan_info, client, false); @@ -710,11 +672,9 @@ static int i40e_client_release(struct i40e_client *client) } /* delete the client instance from the list */ list_move(&cdev->list, &cdevs_tmp); - atomic_dec(&client->ref_cnt); dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n", client->name); } -out: mutex_unlock(&i40e_client_instance_mutex); /* free the client device and release its vsi */ @@ -1040,17 +1000,10 @@ int i40e_unregister_client(struct i40e_client *client) ret = -ENODEV; goto out; } - if (atomic_read(&client->ref_cnt) == 0) { - clear_bit(__I40E_CLIENT_REGISTERED, &client->state); - list_del(&client->list); - pr_info("i40e: Unregistered client %s with return code %d\n", - client->name, ret); - } else { - ret = I40E_ERR_NOT_READY; - pr_err("i40e: Client %s failed unregister - client has open instances\n", - client->name); - } - + clear_bit(__I40E_CLIENT_REGISTERED, &client->state); + list_del(&client->list); + pr_info("i40e: Unregistered client %s with return code %d\n", + client->name, ret); out: mutex_unlock(&i40e_client_mutex); return ret; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index 38a6c36a6a0e..528bd79b05fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -203,8 +203,6 @@ struct i40e_client_instance { struct i40e_info lan_info; struct i40e_client *client; unsigned long state; - /* A count of all the in-progress calls to the client */ - atomic_t ref_cnt; }; struct i40e_client { diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 2154a34c1dd8..98791ba57211 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1849,7 +1849,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, else hw_link_info->crc_enable = false; - if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_ENABLE)) + if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_IS_ENABLED)) hw_link_info->lse_enable = true; else hw_link_info->lse_enable = false; @@ -2494,7 +2494,10 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) if (status) return status; - if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) { + /* extra checking needed to ensure link info to user is timely */ + if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) && + ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) || + !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) { status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (status) @@ -3310,8 +3313,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, /* partition id is 1-based, and functions are evenly spread * across the ports as partitions */ - hw->partition_id = (hw->pf_id / hw->num_ports) + 1; - hw->num_partitions = num_functions / hw->num_ports; + if (hw->num_ports != 0) { + hw->partition_id = (hw->pf_id / hw->num_ports) + 1; + hw->num_partitions = num_functions / hw->num_ports; + } /* additional HW specific goodies that might * someday be HW version specific diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 0c1875b5b16d..b8a03a05c4e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -134,7 +134,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) struct rtnl_link_stats64 *nstat; struct i40e_mac_filter *f; struct i40e_vsi *vsi; - int i; + int i, bkt; vsi = i40e_dbg_find_vsi(pf, seid); if (!vsi) { @@ -166,11 +166,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) pf->hw.mac.addr, pf->hw.mac.san_addr, pf->hw.mac.port_addr); - list_for_each_entry(f, &vsi->mac_filter_list, list) { + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { dev_info(&pf->pdev->dev, - " mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d, state %s\n", - f->macaddr, f->vlan, f->is_netdev, f->is_vf, - f->counter, i40e_filter_state_string[f->state]); + " mac_filter_hash: %pM vid=%d, state %s\n", + f->macaddr, f->vlan, + i40e_filter_state_string[f->state]); } dev_info(&pf->pdev->dev, " active_filters %d, promisc_threshold %d, overflow promisc %s\n", vsi->active_filters, vsi->promisc_threshold, @@ -867,86 +867,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid); i40e_veb_release(pf->veb[i]); - - } else if (strncmp(cmd_buf, "add macaddr", 11) == 0) { - struct i40e_mac_filter *f; - int vlan = 0; - u8 ma[6]; - int ret; - - cnt = sscanf(&cmd_buf[11], - "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i", - &vsi_seid, - &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5], - &vlan); - if (cnt == 7) { - vlan = 0; - } else if (cnt != 8) { - dev_info(&pf->pdev->dev, - "add macaddr: bad command string, cnt=%d\n", - cnt); - goto command_write_done; - } - - vsi = i40e_dbg_find_vsi(pf, vsi_seid); - if (!vsi) { - dev_info(&pf->pdev->dev, - "add macaddr: VSI %d not found\n", vsi_seid); - goto command_write_done; - } - - spin_lock_bh(&vsi->mac_filter_list_lock); - f = i40e_add_filter(vsi, ma, vlan, false, false); - spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi); - if (f && !ret) - dev_info(&pf->pdev->dev, - "add macaddr: %pM vlan=%d added to VSI %d\n", - ma, vlan, vsi_seid); - else - dev_info(&pf->pdev->dev, - "add macaddr: %pM vlan=%d to VSI %d failed, f=%p ret=%d\n", - ma, vlan, vsi_seid, f, ret); - - } else if (strncmp(cmd_buf, "del macaddr", 11) == 0) { - int vlan = 0; - u8 ma[6]; - int ret; - - cnt = sscanf(&cmd_buf[11], - "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i", - &vsi_seid, - &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5], - &vlan); - if (cnt == 7) { - vlan = 0; - } else if (cnt != 8) { - dev_info(&pf->pdev->dev, - "del macaddr: bad command string, cnt=%d\n", - cnt); - goto command_write_done; - } - - vsi = i40e_dbg_find_vsi(pf, vsi_seid); - if (!vsi) { - dev_info(&pf->pdev->dev, - "del macaddr: VSI %d not found\n", vsi_seid); - goto command_write_done; - } - - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_del_filter(vsi, ma, vlan, false, false); - spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi); - if (!ret) - dev_info(&pf->pdev->dev, - "del macaddr: %pM vlan=%d removed from VSI %d\n", - ma, vlan, vsi_seid); - else - dev_info(&pf->pdev->dev, - "del macaddr: %pM vlan=%d from VSI %d failed, ret=%d\n", - ma, vlan, vsi_seid, ret); - } else if (strncmp(cmd_buf, "add pvid", 8) == 0) { i40e_status ret; u16 vid; @@ -1210,24 +1130,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, "dump debug fwdata <cluster_id> <table_id> <index>\n"); } - - } else if (strncmp(cmd_buf, "msg_enable", 10) == 0) { - u32 level; - cnt = sscanf(&cmd_buf[10], "%i", &level); - if (cnt) { - if (I40E_DEBUG_USER & level) { - pf->hw.debug_mask = level; - dev_info(&pf->pdev->dev, - "set hw.debug_mask = 0x%08x\n", - pf->hw.debug_mask); - } - pf->msg_enable = level; - dev_info(&pf->pdev->dev, "set msg_enable = 0x%08x\n", - pf->msg_enable); - } else { - dev_info(&pf->pdev->dev, "msg_enable = 0x%08x\n", - pf->msg_enable); - } } else if (strncmp(cmd_buf, "pfr", 3) == 0) { dev_info(&pf->pdev->dev, "debugfs: forcing PFR\n"); i40e_do_reset_safe(pf, BIT(__I40E_PF_RESET_REQUESTED)); @@ -1633,8 +1535,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " del vsi [vsi_seid]\n"); dev_info(&pf->pdev->dev, " add relay <uplink_seid> <vsi_seid>\n"); dev_info(&pf->pdev->dev, " del relay <relay_seid>\n"); - dev_info(&pf->pdev->dev, " add macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n"); - dev_info(&pf->pdev->dev, " del macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n"); dev_info(&pf->pdev->dev, " add pvid <vsi_seid> <vid>\n"); dev_info(&pf->pdev->dev, " del pvid <vsi_seid>\n"); dev_info(&pf->pdev->dev, " dump switch\n"); @@ -1644,7 +1544,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " dump desc aq\n"); dev_info(&pf->pdev->dev, " dump reset stats\n"); dev_info(&pf->pdev->dev, " dump debug fwdata <cluster_id> <table_id> <index>\n"); - dev_info(&pf->pdev->dev, " msg_enable [level]\n"); dev_info(&pf->pdev->dev, " read <reg>\n"); dev_info(&pf->pdev->dev, " write <reg> <value>\n"); dev_info(&pf->pdev->dev, " clear_stats vsi [seid]\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 92bc8846f1ba..b9e1162d927f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -104,7 +104,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { * The PF_STATs are appended to the netdev stats only when ethtool -S * is queried on the base PF netdev, not on the VMDq or FCoE netdev. */ -static struct i40e_stats i40e_gstrings_stats[] = { +static const struct i40e_stats i40e_gstrings_stats[] = { I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes), I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes), I40E_PF_STAT("rx_unicast", stats.eth.rx_unicast), @@ -216,7 +216,6 @@ enum i40e_ethtool_test_id { I40E_ETH_TEST_REG = 0, I40E_ETH_TEST_EEPROM, I40E_ETH_TEST_INTR, - I40E_ETH_TEST_LOOPBACK, I40E_ETH_TEST_LINK, }; @@ -224,7 +223,6 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = { "Register test (offline)", "Eeprom test (offline)", "Interrupt test (offline)", - "Loopback test (offline)", "Link test (on/offline)" }; @@ -978,6 +976,10 @@ static u32 i40e_get_msglevel(struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; + u32 debug_mask = pf->hw.debug_mask; + + if (debug_mask) + netdev_info(netdev, "i40e debug_mask: 0x%08X\n", debug_mask); return pf->msg_enable; } @@ -989,7 +991,8 @@ static void i40e_set_msglevel(struct net_device *netdev, u32 data) if (I40E_DEBUG_USER & data) pf->hw.debug_mask = data; - pf->msg_enable = data; + else + pf->msg_enable = data; } static int i40e_get_regs_len(struct net_device *netdev) @@ -1739,17 +1742,6 @@ static int i40e_intr_test(struct net_device *netdev, u64 *data) return *data; } -static int i40e_loopback_test(struct net_device *netdev, u64 *data) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_pf *pf = np->vsi->back; - - netif_info(pf, hw, netdev, "loopback test not implemented\n"); - *data = 0; - - return *data; -} - static inline bool i40e_active_vfs(struct i40e_pf *pf) { struct i40e_vf *vfs = pf->vf; @@ -1763,17 +1755,7 @@ static inline bool i40e_active_vfs(struct i40e_pf *pf) static inline bool i40e_active_vmdqs(struct i40e_pf *pf) { - struct i40e_vsi **vsi = pf->vsi; - int i; - - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (!vsi[i]) - continue; - if (vsi[i]->type == I40E_VSI_VMDQ2) - return true; - } - - return false; + return !!i40e_find_vsi_by_type(pf, I40E_VSI_VMDQ2); } static void i40e_diag_test(struct net_device *netdev, @@ -1795,7 +1777,6 @@ static void i40e_diag_test(struct net_device *netdev, data[I40E_ETH_TEST_REG] = 1; data[I40E_ETH_TEST_EEPROM] = 1; data[I40E_ETH_TEST_INTR] = 1; - data[I40E_ETH_TEST_LOOPBACK] = 1; data[I40E_ETH_TEST_LINK] = 1; eth_test->flags |= ETH_TEST_FL_FAILED; clear_bit(__I40E_TESTING, &pf->state); @@ -1823,9 +1804,6 @@ static void i40e_diag_test(struct net_device *netdev, if (i40e_intr_test(netdev, &data[I40E_ETH_TEST_INTR])) eth_test->flags |= ETH_TEST_FL_FAILED; - if (i40e_loopback_test(netdev, &data[I40E_ETH_TEST_LOOPBACK])) - eth_test->flags |= ETH_TEST_FL_FAILED; - /* run reg test last, a reset is required after it */ if (i40e_reg_test(netdev, &data[I40E_ETH_TEST_REG])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1846,7 +1824,6 @@ static void i40e_diag_test(struct net_device *netdev, data[I40E_ETH_TEST_REG] = 0; data[I40E_ETH_TEST_EEPROM] = 0; data[I40E_ETH_TEST_INTR] = 0; - data[I40E_ETH_TEST_LOOPBACK] = 0; } skip_ol_tests: diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 58e6c1570335..b077ef8b00fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1522,12 +1522,12 @@ void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi) * same PCI function. */ netdev->dev_port = 1; - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_add_filter(vsi, hw->mac.san_addr, 0, false, false); - i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0, false, false); - i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0, false, false); - i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0, false, false); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); + i40e_add_filter(vsi, hw->mac.san_addr, 0); + i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0); + i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0); + i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* use san mac */ ether_addr_copy(netdev->dev_addr, hw->mac.san_addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 31c97e3937a4..5c6a5ceb8a91 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 16 +#define DRV_VERSION_BUILD 21 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -93,8 +93,8 @@ MODULE_DEVICE_TABLE(pci, i40e_pci_tbl); #define I40E_MAX_VF_COUNT 128 static int debug = -1; -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +module_param(debug, uint, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)"); MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); @@ -1145,25 +1145,22 @@ void i40e_update_stats(struct i40e_vsi *vsi) * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan - * @is_vf: make sure its a VF filter, else doesn't matter - * @is_netdev: make sure its a netdev filter, else doesn't matter * * Returns ptr to the filter object or NULL **/ static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi, - u8 *macaddr, s16 vlan, - bool is_vf, bool is_netdev) + const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; + u64 key; if (!vsi || !macaddr) return NULL; - list_for_each_entry(f, &vsi->mac_filter_list, list) { + key = i40e_addr_to_hkey(macaddr); + hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) { if ((ether_addr_equal(macaddr, f->macaddr)) && - (vlan == f->vlan) && - (!is_vf || f->is_vf) && - (!is_netdev || f->is_netdev)) + (vlan == f->vlan)) return f; } return NULL; @@ -1173,24 +1170,21 @@ static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi, * i40e_find_mac - Find a mac addr in the macvlan filters list * @vsi: the VSI to be searched * @macaddr: the MAC address we are searching for - * @is_vf: make sure its a VF filter, else doesn't matter - * @is_netdev: make sure its a netdev filter, else doesn't matter * * Returns the first filter with the provided MAC address or NULL if * MAC address was not found **/ -struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev) +struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr) { struct i40e_mac_filter *f; + u64 key; if (!vsi || !macaddr) return NULL; - list_for_each_entry(f, &vsi->mac_filter_list, list) { - if ((ether_addr_equal(macaddr, f->macaddr)) && - (!is_vf || f->is_vf) && - (!is_netdev || f->is_netdev)) + key = i40e_addr_to_hkey(macaddr); + hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) { + if ((ether_addr_equal(macaddr, f->macaddr))) return f; } return NULL; @@ -1204,119 +1198,31 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, **/ bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi) { - struct i40e_mac_filter *f; + /* If we have a PVID, always operate in VLAN mode */ + if (vsi->info.pvid) + return true; - /* Only -1 for all the filters denotes not in vlan mode - * so we have to go through all the list in order to make sure + /* We need to operate in VLAN mode whenever we have any filters with + * a VLAN other than I40E_VLAN_ALL. We could check the table each + * time, incurring search cost repeatedly. However, we can notice two + * things: + * + * 1) the only place where we can gain a VLAN filter is in + * i40e_add_filter. + * + * 2) the only place where filters are actually removed is in + * i40e_vsi_sync_filters_subtask. + * + * Thus, we can simply use a boolean value, has_vlan_filters which we + * will set to true when we add a VLAN filter in i40e_add_filter. Then + * we have to perform the full search after deleting filters in + * i40e_vsi_sync_filters_subtask, but we already have to search + * filters here and can perform the check at the same time. This + * results in avoiding embedding a loop for VLAN mode inside another + * loop over all the filters, and should maintain correctness as noted + * above. */ - list_for_each_entry(f, &vsi->mac_filter_list, list) { - if (f->vlan >= 0 || vsi->info.pvid) - return true; - } - - return false; -} - -/** - * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans - * @vsi: the VSI to be searched - * @macaddr: the mac address to be filtered - * @is_vf: true if it is a VF - * @is_netdev: true if it is a netdev - * - * Goes through all the macvlan filters and adds a - * macvlan filter for each unique vlan that already exists - * - * Returns first filter found on success, else NULL - **/ -struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev) -{ - struct i40e_mac_filter *f; - - list_for_each_entry(f, &vsi->mac_filter_list, list) { - if (vsi->info.pvid) - f->vlan = le16_to_cpu(vsi->info.pvid); - if (!i40e_find_filter(vsi, macaddr, f->vlan, - is_vf, is_netdev)) { - if (!i40e_add_filter(vsi, macaddr, f->vlan, - is_vf, is_netdev)) - return NULL; - } - } - - return list_first_entry_or_null(&vsi->mac_filter_list, - struct i40e_mac_filter, list); -} - -/** - * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS - * @vsi: the VSI to be searched - * @macaddr: the mac address to be removed - * @is_vf: true if it is a VF - * @is_netdev: true if it is a netdev - * - * Removes a given MAC address from a VSI, regardless of VLAN - * - * Returns 0 for success, or error - **/ -int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr, - bool is_vf, bool is_netdev) -{ - struct i40e_mac_filter *f = NULL; - int changed = 0; - - WARN(!spin_is_locked(&vsi->mac_filter_list_lock), - "Missing mac_filter_list_lock\n"); - list_for_each_entry(f, &vsi->mac_filter_list, list) { - if ((ether_addr_equal(macaddr, f->macaddr)) && - (is_vf == f->is_vf) && - (is_netdev == f->is_netdev)) { - f->counter--; - changed = 1; - if (f->counter == 0) - f->state = I40E_FILTER_REMOVE; - } - } - if (changed) { - vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; - return 0; - } - return -ENOENT; -} - -/** - * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM - * @vsi: the PF Main VSI - inappropriate for any other VSI - * @macaddr: the MAC address - * - * Remove whatever filter the firmware set up so the driver can manage - * its own filtering intelligently. - **/ -static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) -{ - struct i40e_aqc_remove_macvlan_element_data element; - struct i40e_pf *pf = vsi->back; - - /* Only appropriate for the PF main VSI */ - if (vsi->type != I40E_VSI_MAIN) - return; - - memset(&element, 0, sizeof(element)); - ether_addr_copy(element.mac_addr, macaddr); - element.vlan_tag = 0; - /* Ignore error returns, some firmware does it this way... */ - element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; - i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); - - memset(&element, 0, sizeof(element)); - ether_addr_copy(element.mac_addr, macaddr); - element.vlan_tag = 0; - /* ...and some firmware does it this way. */ - element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | - I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; - i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); + return vsi->has_vlan_filter; } /** @@ -1324,20 +1230,17 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan - * @is_vf: make sure its a VF filter, else doesn't matter - * @is_netdev: make sure its a netdev filter, else doesn't matter * * Returns ptr to the filter object or NULL when no memory available. * - * NOTE: This function is expected to be called with mac_filter_list_lock + * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. **/ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, - u8 *macaddr, s16 vlan, - bool is_vf, bool is_netdev) + const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; - int changed = false; + u64 key; if (!vsi || !macaddr) return NULL; @@ -1349,11 +1252,17 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, if (is_broadcast_ether_addr(macaddr)) return NULL; - f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev); + f = i40e_find_filter(vsi, macaddr, vlan); if (!f) { f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) - goto add_filter_out; + return NULL; + + /* Update the boolean indicating if we need to function in + * VLAN mode. + */ + if (vlan >= 0) + vsi->has_vlan_filter = true; ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; @@ -1365,100 +1274,148 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, f->state = I40E_FILTER_FAILED; else f->state = I40E_FILTER_NEW; - changed = true; - INIT_LIST_HEAD(&f->list); - list_add_tail(&f->list, &vsi->mac_filter_list); - } + INIT_HLIST_NODE(&f->hlist); - /* increment counter and add a new flag if needed */ - if (is_vf) { - if (!f->is_vf) { - f->is_vf = true; - f->counter++; - } - } else if (is_netdev) { - if (!f->is_netdev) { - f->is_netdev = true; - f->counter++; - } - } else { - f->counter++; - } + key = i40e_addr_to_hkey(macaddr); + hash_add(vsi->mac_filter_hash, &f->hlist, key); - if (changed) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } -add_filter_out: + /* If we're asked to add a filter that has been marked for removal, it + * is safe to simply restore it to active state. __i40e_del_filter + * will have simply deleted any filters which were previously marked + * NEW or FAILED, so if it is currently marked REMOVE it must have + * previously been ACTIVE. Since we haven't yet run the sync filters + * task, just restore this filter to the ACTIVE state so that the + * sync task leaves it in place + */ + if (f->state == I40E_FILTER_REMOVE) + f->state = I40E_FILTER_ACTIVE; + return f; } /** - * i40e_del_filter - Remove a mac/vlan filter from the VSI + * __i40e_del_filter - Remove a specific filter from the VSI + * @vsi: VSI to remove from + * @f: the filter to remove from the list + * + * This function should be called instead of i40e_del_filter only if you know + * the exact filter you will remove already, such as via i40e_find_filter or + * i40e_find_mac. + * + * NOTE: This function is expected to be called with mac_filter_hash_lock + * being held. + * ANOTHER NOTE: This function MUST be called from within the context of + * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() + * instead of list_for_each_entry(). + **/ +static void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) +{ + if (!f) + return; + + if ((f->state == I40E_FILTER_FAILED) || + (f->state == I40E_FILTER_NEW)) { + /* this one never got added by the FW. Just remove it, + * no need to sync anything. + */ + hash_del(&f->hlist); + kfree(f); + } else { + f->state = I40E_FILTER_REMOVE; + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + vsi->back->flags |= I40E_FLAG_FILTER_SYNC; + } +} + +/** + * i40e_del_filter - Remove a MAC/VLAN filter from the VSI * @vsi: the VSI to be searched * @macaddr: the MAC address - * @vlan: the vlan - * @is_vf: make sure it's a VF filter, else doesn't matter - * @is_netdev: make sure it's a netdev filter, else doesn't matter + * @vlan: the VLAN * - * NOTE: This function is expected to be called with mac_filter_list_lock + * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. * ANOTHER NOTE: This function MUST be called from within the context of * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() * instead of list_for_each_entry(). **/ -void i40e_del_filter(struct i40e_vsi *vsi, - u8 *macaddr, s16 vlan, - bool is_vf, bool is_netdev) +void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; if (!vsi || !macaddr) return; - f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev); - if (!f || f->counter == 0) - return; + f = i40e_find_filter(vsi, macaddr, vlan); + __i40e_del_filter(vsi, f); +} - if (is_vf) { - if (f->is_vf) { - f->is_vf = false; - f->counter--; - } - } else if (is_netdev) { - if (f->is_netdev) { - f->is_netdev = false; - f->counter--; - } - } else { - /* make sure we don't remove a filter in use by VF or netdev */ - int min_f = 0; +/** + * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans + * @vsi: the VSI to be searched + * @macaddr: the mac address to be filtered + * + * Goes through all the macvlan filters and adds a macvlan filter for each + * unique vlan that already exists. If a PVID has been assigned, instead only + * add the macaddr to that VLAN. + * + * Returns last filter added on success, else NULL + **/ +struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, + const u8 *macaddr) +{ + struct i40e_mac_filter *f, *add = NULL; + struct hlist_node *h; + int bkt; - min_f += (f->is_vf ? 1 : 0); - min_f += (f->is_netdev ? 1 : 0); + if (vsi->info.pvid) + return i40e_add_filter(vsi, macaddr, + le16_to_cpu(vsi->info.pvid)); - if (f->counter > min_f) - f->counter--; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_REMOVE) + continue; + add = i40e_add_filter(vsi, macaddr, f->vlan); + if (!add) + return NULL; } - /* counter == 0 tells sync_filters_subtask to - * remove the filter from the firmware's list - */ - if (f->counter == 0) { - if ((f->state == I40E_FILTER_FAILED) || - (f->state == I40E_FILTER_NEW)) { - /* this one never got added by the FW. Just remove it, - * no need to sync anything. - */ - list_del(&f->list); - kfree(f); - } else { - f->state = I40E_FILTER_REMOVE; - vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; + return add; +} + +/** + * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS + * @vsi: the VSI to be searched + * @macaddr: the mac address to be removed + * + * Removes a given MAC address from a VSI, regardless of VLAN + * + * Returns 0 for success, or error + **/ +int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + bool found = false; + int bkt; + + WARN(!spin_is_locked(&vsi->mac_filter_hash_lock), + "Missing mac_filter_hash_lock\n"); + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (ether_addr_equal(macaddr, f->macaddr)) { + __i40e_del_filter(vsi, f); + found = true; } } + + if (found) + return 0; + else + return -ENOENT; } /** @@ -1499,10 +1456,10 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_del_mac_all_vlan(vsi, netdev->dev_addr, false, true); - i40e_put_mac_in_vlan(vsi, addr->sa_data, false, true); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); + i40e_del_mac_all_vlan(vsi, netdev->dev_addr); + i40e_put_mac_in_vlan(vsi, addr->sa_data); + spin_unlock_bh(&vsi->mac_filter_hash_lock); ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; @@ -1666,6 +1623,52 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } /** + * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40e_addr_sync(struct net_device *netdev, const u8 *addr) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_mac_filter *f; + + if (i40e_is_vsi_in_vlan(vsi)) + f = i40e_put_mac_in_vlan(vsi, addr); + else + f = i40e_add_filter(vsi, addr, I40E_VLAN_ANY); + + if (f) + return 0; + else + return -ENOMEM; +} + +/** + * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + + if (i40e_is_vsi_in_vlan(vsi)) + i40e_del_mac_all_vlan(vsi, addr); + else + i40e_del_filter(vsi, addr, I40E_VLAN_ANY); + + return 0; +} + +/** * i40e_set_rx_mode - NDO callback to set the netdev filters * @netdev: network interface device structure **/ @@ -1676,62 +1679,14 @@ static void i40e_set_rx_mode(struct net_device *netdev) #endif { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_mac_filter *f, *ftmp; struct i40e_vsi *vsi = np->vsi; - struct netdev_hw_addr *uca; - struct netdev_hw_addr *mca; - struct netdev_hw_addr *ha; - - spin_lock_bh(&vsi->mac_filter_list_lock); - - /* add addr if not already in the filter list */ - netdev_for_each_uc_addr(uca, netdev) { - if (!i40e_find_mac(vsi, uca->addr, false, true)) { - if (i40e_is_vsi_in_vlan(vsi)) - i40e_put_mac_in_vlan(vsi, uca->addr, - false, true); - else - i40e_add_filter(vsi, uca->addr, I40E_VLAN_ANY, - false, true); - } - } - - netdev_for_each_mc_addr(mca, netdev) { - if (!i40e_find_mac(vsi, mca->addr, false, true)) { - if (i40e_is_vsi_in_vlan(vsi)) - i40e_put_mac_in_vlan(vsi, mca->addr, - false, true); - else - i40e_add_filter(vsi, mca->addr, I40E_VLAN_ANY, - false, true); - } - } - /* remove filter if not in netdev list */ - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { - - if (!f->is_netdev) - continue; + spin_lock_bh(&vsi->mac_filter_hash_lock); - netdev_for_each_mc_addr(mca, netdev) - if (ether_addr_equal(mca->addr, f->macaddr)) - goto bottom_of_search_loop; + __dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync); + __dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync); - netdev_for_each_uc_addr(uca, netdev) - if (ether_addr_equal(uca->addr, f->macaddr)) - goto bottom_of_search_loop; - - for_each_dev_addr(netdev, ha) - if (ether_addr_equal(ha->addr, f->macaddr)) - goto bottom_of_search_loop; - - /* f->macaddr wasn't found in uc, mc, or ha list so delete it */ - i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, false, true); - -bottom_of_search_loop: - continue; - } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* check for other flag changes */ if (vsi->current_netdev_flags != vsi->netdev->flags) { @@ -1746,21 +1701,26 @@ bottom_of_search_loop: } /** - * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries - * @vsi: pointer to vsi struct + * i40e_undo_filter_entries - Undo the changes made to MAC filter entries + * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * - * MAC filter entries from list were slated to be removed from device. + * MAC filter entries from list were slated to be sent to firmware, either for + * addition or deletion. **/ -static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, - struct list_head *from) +static void i40e_undo_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) { - struct i40e_mac_filter *f, *ftmp; + struct i40e_mac_filter *f; + struct hlist_node *h; + + hlist_for_each_entry_safe(f, h, from, hlist) { + u64 key = i40e_addr_to_hkey(f->macaddr); - list_for_each_entry_safe(f, ftmp, from, list) { /* Move the element back into MAC filter list*/ - list_move_tail(&f->list, &vsi->mac_filter_list); + hlist_del(&f->hlist); + hash_add(vsi->mac_filter_hash, &f->hlist, key); } } @@ -1789,7 +1749,9 @@ i40e_update_filter_state(int count, /* Everything's good, mark all filters active. */ for (i = 0; i < count ; i++) { add_head->state = I40E_FILTER_ACTIVE; - add_head = list_next_entry(add_head, list); + add_head = hlist_entry(add_head->hlist.next, + typeof(struct i40e_mac_filter), + hlist); } } else if (aq_err == I40E_AQ_RC_ENOSPC) { /* Device ran out of filter space. Check the return value @@ -1803,20 +1765,98 @@ i40e_update_filter_state(int count, add_head->state = I40E_FILTER_ACTIVE; retval++; } - add_head = list_next_entry(add_head, list); + add_head = hlist_entry(add_head->hlist.next, + typeof(struct i40e_mac_filter), + hlist); } } else { /* Some other horrible thing happened, fail all filters */ retval = 0; for (i = 0; i < count ; i++) { add_head->state = I40E_FILTER_FAILED; - add_head = list_next_entry(add_head, list); + add_head = hlist_entry(add_head->hlist.next, + typeof(struct i40e_mac_filter), + hlist); } } return retval; } /** + * i40e_aqc_del_filters - Request firmware to delete a set of filters + * @vsi: ptr to the VSI + * @vsi_name: name to display in messages + * @list: the list of filters to send to firmware + * @num_del: the number of filters to delete + * @retval: Set to -EIO on failure to delete + * + * Send a request to firmware via AdminQ to delete a set of filters. Uses + * *retval instead of a return value so that success does not force ret_val to + * be set to 0. This ensures that a sequence of calls to this function + * preserve the previous value of *retval on successful delete. + */ +static +void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_aqc_remove_macvlan_element_data *list, + int num_del, int *retval) +{ + struct i40e_hw *hw = &vsi->back->hw; + i40e_status aq_ret; + int aq_err; + + aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL); + aq_err = hw->aq.asq_last_status; + + /* Explicitly ignore and do not report when firmware returns ENOENT */ + if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { + *retval = -EIO; + dev_info(&vsi->back->pdev->dev, + "ignoring delete macvlan error on %s, err %s, aq_err %s\n", + vsi_name, i40e_stat_str(hw, aq_ret), + i40e_aq_str(hw, aq_err)); + } +} + +/** + * i40e_aqc_add_filters - Request firmware to add a set of filters + * @vsi: ptr to the VSI + * @vsi_name: name to display in messages + * @list: the list of filters to send to firmware + * @add_head: Position in the add hlist + * @num_add: the number of filters to add + * @promisc_change: set to true on exit if promiscuous mode was forced on + * + * Send a request to firmware via AdminQ to add a chunk of filters. Will set + * promisc_changed to true if the firmware has run out of space for more + * filters. + */ +static +void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_aqc_add_macvlan_element_data *list, + struct i40e_mac_filter *add_head, + int num_add, bool *promisc_changed) +{ + struct i40e_hw *hw = &vsi->back->hw; + i40e_status aq_ret; + int aq_err, fcnt; + + aq_ret = i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL); + aq_err = hw->aq.asq_last_status; + fcnt = i40e_update_filter_state(num_add, list, add_head, aq_ret); + vsi->active_filters += fcnt; + + if (fcnt != num_add) { + *promisc_changed = true; + set_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + vsi->promisc_threshold = (vsi->active_filters * 3) / 4; + dev_warn(&vsi->back->pdev->dev, + "Error %s adding RX filters on %s, promiscuous mode forced on\n", + i40e_aq_str(hw, aq_err), + vsi_name); + } +} + +/** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI * @@ -1826,22 +1866,25 @@ i40e_update_filter_state(int count, **/ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { - struct i40e_mac_filter *f, *ftmp, *add_head = NULL; - struct list_head tmp_add_list, tmp_del_list; + struct hlist_head tmp_add_list, tmp_del_list; + struct i40e_mac_filter *f, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; + unsigned int vlan_any_filters = 0; + unsigned int non_vlan_filters = 0; + unsigned int vlan_filters = 0; bool promisc_changed = false; char vsi_name[16] = "PF"; int filter_list_len = 0; - u32 changed_flags = 0; i40e_status aq_ret = 0; - int retval = 0; + u32 changed_flags = 0; + struct hlist_node *h; struct i40e_pf *pf; int num_add = 0; int num_del = 0; - int aq_err = 0; + int retval = 0; u16 cmd_flags; int list_size; - int fcnt; + int bkt; /* empty array typed pointers, kcalloc later */ struct i40e_aqc_add_macvlan_element_data *add_list; @@ -1856,8 +1899,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) vsi->current_netdev_flags = vsi->netdev->flags; } - INIT_LIST_HEAD(&tmp_add_list); - INIT_LIST_HEAD(&tmp_del_list); + INIT_HLIST_HEAD(&tmp_add_list); + INIT_HLIST_HEAD(&tmp_del_list); if (vsi->type == I40E_VSI_SRIOV) snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id); @@ -1867,41 +1910,98 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) { vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED; - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* Create a list of filters to delete. */ - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_REMOVE) { - WARN_ON(f->counter != 0); /* Move the element into temporary del_list */ - list_move_tail(&f->list, &tmp_del_list); + hash_del(&f->hlist); + hlist_add_head(&f->hlist, &tmp_del_list); vsi->active_filters--; + + /* Avoid counting removed filters */ + continue; } if (f->state == I40E_FILTER_NEW) { - WARN_ON(f->counter == 0); - /* Move the element into temporary add_list */ - list_move_tail(&f->list, &tmp_add_list); + hash_del(&f->hlist); + hlist_add_head(&f->hlist, &tmp_add_list); } + + /* Count the number of each type of filter we have + * remaining, ignoring any filters we're about to + * delete. + */ + if (f->vlan > 0) + vlan_filters++; + else if (!f->vlan) + non_vlan_filters++; + else + vlan_any_filters++; } - spin_unlock_bh(&vsi->mac_filter_list_lock); + + /* We should never have VLAN=-1 filters at the same time as we + * have either VLAN=0 or VLAN>0 filters, so warn about this + * case here to help catch any issues. + */ + WARN_ON(vlan_any_filters && (vlan_filters + non_vlan_filters)); + + /* If we only have VLAN=0 filters remaining, and don't have + * any other VLAN filters, we need to convert these VLAN=0 + * filters into VLAN=-1 (I40E_VLAN_ANY) so that we operate + * correctly in non-VLAN mode and receive all traffic tagged + * or untagged. + */ + if (non_vlan_filters && !vlan_filters) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, + hlist) { + /* Only replace VLAN=0 filters */ + if (f->vlan) + continue; + + /* Allocate a replacement element */ + add_head = kzalloc(sizeof(*add_head), + GFP_KERNEL); + if (!add_head) + goto err_no_memory_locked; + + /* Copy the filter, with new state and VLAN */ + *add_head = *f; + add_head->state = I40E_FILTER_NEW; + add_head->vlan = I40E_VLAN_ANY; + + /* Move the replacement to the add list */ + INIT_HLIST_NODE(&add_head->hlist); + hlist_add_head(&add_head->hlist, + &tmp_add_list); + + /* Move the original to the delete list */ + f->state = I40E_FILTER_REMOVE; + hash_del(&f->hlist); + hlist_add_head(&f->hlist, &tmp_del_list); + vsi->active_filters--; + } + + /* Also update any filters on the tmp_add list */ + hlist_for_each_entry(f, &tmp_add_list, hlist) { + if (!f->vlan) + f->vlan = I40E_VLAN_ANY; + } + add_head = NULL; + } + spin_unlock_bh(&vsi->mac_filter_hash_lock); } /* Now process 'del_list' outside the lock */ - if (!list_empty(&tmp_del_list)) { + if (!hlist_empty(&tmp_del_list)) { filter_list_len = hw->aq.asq_buf_size / sizeof(struct i40e_aqc_remove_macvlan_element_data); list_size = filter_list_len * sizeof(struct i40e_aqc_remove_macvlan_element_data); del_list = kzalloc(list_size, GFP_ATOMIC); - if (!del_list) { - /* Undo VSI's MAC filter entry element updates */ - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_undo_del_filter_entries(vsi, &tmp_del_list); - spin_unlock_bh(&vsi->mac_filter_list_lock); - retval = -ENOMEM; - goto out; - } + if (!del_list) + goto err_no_memory; - list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) { + hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) { cmd_flags = 0; /* add to delete list */ @@ -1920,68 +2020,47 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_del == filter_list_len) { - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, - del_list, - num_del, NULL); - aq_err = hw->aq.asq_last_status; - num_del = 0; + i40e_aqc_del_filters(vsi, vsi_name, del_list, + num_del, &retval); memset(del_list, 0, list_size); - - /* Explicitly ignore and do not report when - * firmware returns ENOENT. - */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { - retval = -EIO; - dev_info(&pf->pdev->dev, - "ignoring delete macvlan error on %s, err %s, aq_err %s\n", - vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); - } + num_del = 0; } /* Release memory for MAC filter entries which were * synced up with HW. */ - list_del(&f->list); + hlist_del(&f->hlist); kfree(f); } if (num_del) { - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, del_list, - num_del, NULL); - aq_err = hw->aq.asq_last_status; - num_del = 0; - - /* Explicitly ignore and do not report when firmware - * returns ENOENT. - */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { - retval = -EIO; - dev_info(&pf->pdev->dev, - "ignoring delete macvlan error on %s, err %s aq_err %s\n", - vsi_name, - i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); - } + i40e_aqc_del_filters(vsi, vsi_name, del_list, + num_del, &retval); } kfree(del_list); del_list = NULL; } - if (!list_empty(&tmp_add_list)) { + /* After finishing notifying firmware of the deleted filters, update + * the cached value of vsi->has_vlan_filter. Note that we are safe to + * use just !!vlan_filters here because if we only have VLAN=0 (that + * is, non_vlan_filters) these will all be converted to VLAN=-1 in the + * logic above already so this value would still be correct. + */ + vsi->has_vlan_filter = !!vlan_filters; + + if (!hlist_empty(&tmp_add_list)) { /* Do all the adds now. */ filter_list_len = hw->aq.asq_buf_size / sizeof(struct i40e_aqc_add_macvlan_element_data); list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data); add_list = kzalloc(list_size, GFP_ATOMIC); - if (!add_list) { - retval = -ENOMEM; - goto out; - } + if (!add_list) + goto err_no_memory; + num_add = 0; - list_for_each_entry(f, &tmp_add_list, list) { + hlist_for_each_entry(f, &tmp_add_list, hlist) { if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { f->state = I40E_FILTER_FAILED; @@ -2006,57 +2085,28 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { - aq_ret = i40e_aq_add_macvlan(hw, vsi->seid, - add_list, num_add, - NULL); - aq_err = hw->aq.asq_last_status; - fcnt = i40e_update_filter_state(num_add, - add_list, - add_head, - aq_ret); - vsi->active_filters += fcnt; - - if (fcnt != num_add) { - promisc_changed = true; - set_bit(__I40E_FILTER_OVERFLOW_PROMISC, - &vsi->state); - vsi->promisc_threshold = - (vsi->active_filters * 3) / 4; - dev_warn(&pf->pdev->dev, - "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), - vsi_name); - } + i40e_aqc_add_filters(vsi, vsi_name, add_list, + add_head, num_add, + &promisc_changed); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { - aq_ret = i40e_aq_add_macvlan(hw, vsi->seid, - add_list, num_add, NULL); - aq_err = hw->aq.asq_last_status; - fcnt = i40e_update_filter_state(num_add, add_list, - add_head, aq_ret); - vsi->active_filters += fcnt; - if (fcnt != num_add) { - promisc_changed = true; - set_bit(__I40E_FILTER_OVERFLOW_PROMISC, - &vsi->state); - vsi->promisc_threshold = - (vsi->active_filters * 3) / 4; - dev_warn(&pf->pdev->dev, - "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), vsi_name); - } + i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, + num_add, &promisc_changed); } /* Now move all of the filters from the temp add list back to * the VSI's list. */ - spin_lock_bh(&vsi->mac_filter_list_lock); - list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) { - list_move_tail(&f->list, &vsi->mac_filter_list); + spin_lock_bh(&vsi->mac_filter_hash_lock); + hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { + u64 key = i40e_addr_to_hkey(f->macaddr); + + hlist_del(&f->hlist); + hash_add(vsi->mac_filter_hash, &f->hlist, key); } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); add_list = NULL; } @@ -2068,12 +2118,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* See if we have any failed filters. We can't drop out of * promiscuous until these have all been deleted. */ - spin_lock_bh(&vsi->mac_filter_list_lock); - list_for_each_entry(f, &vsi->mac_filter_list, list) { + spin_lock_bh(&vsi->mac_filter_hash_lock); + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->state == I40E_FILTER_FAILED) failed_count++; } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); if (!failed_count) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", @@ -2201,6 +2251,18 @@ out: clear_bit(__I40E_CONFIG_BUSY, &vsi->state); return retval; + +err_no_memory: + /* Restore elements on the temporary add and delete lists */ + spin_lock_bh(&vsi->mac_filter_hash_lock); +err_no_memory_locked: + i40e_undo_filter_entries(vsi, &tmp_del_list); + i40e_undo_filter_entries(vsi, &tmp_add_list); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + clear_bit(__I40E_CONFIG_BUSY, &vsi->state); + return -ENOMEM; } /** @@ -2239,13 +2301,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) static int i40e_change_mtu(struct net_device *netdev, int new_mtu) { struct i40e_netdev_priv *np = netdev_priv(netdev); - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct i40e_vsi *vsi = np->vsi; - /* MTU < 68 is an error and causes problems on some kernels */ - if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER)) - return -EINVAL; - netdev_info(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); netdev->mtu = new_mtu; @@ -2360,34 +2417,33 @@ static void i40e_vlan_rx_register(struct net_device *netdev, u32 features) **/ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) { - struct i40e_mac_filter *f, *ftmp, *add_f; - bool is_netdev, is_vf; - - is_vf = (vsi->type == I40E_VSI_SRIOV); - is_netdev = !!(vsi->netdev); + struct i40e_mac_filter *f, *add_f, *del_f; + struct hlist_node *h; + int bkt; /* Locked once because all functions invoked below iterates list*/ - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); - if (is_netdev) { - add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid, - is_vf, is_netdev); + if (vsi->netdev) { + add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, "Could not add vlan filter %d for %pM\n", vid, vsi->netdev->dev_addr); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); return -ENOMEM; } } - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { - add_f = i40e_add_filter(vsi, f->macaddr, vid, is_vf, is_netdev); + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_REMOVE) + continue; + add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, "Could not add vlan filter %d for %pM\n", vid, f->macaddr); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); return -ENOMEM; } } @@ -2397,19 +2453,17 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) * with 0, so we now accept untagged and specified tagged traffic * (and not all tags along with untagged) */ - if (vid > 0) { - if (is_netdev && i40e_find_filter(vsi, vsi->netdev->dev_addr, - I40E_VLAN_ANY, - is_vf, is_netdev)) { - i40e_del_filter(vsi, vsi->netdev->dev_addr, - I40E_VLAN_ANY, is_vf, is_netdev); - add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0, - is_vf, is_netdev); + if (vid > 0 && vsi->netdev) { + del_f = i40e_find_filter(vsi, vsi->netdev->dev_addr, + I40E_VLAN_ANY); + if (del_f) { + __i40e_del_filter(vsi, del_f); + add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0); if (!add_f) { dev_info(&vsi->back->pdev->dev, "Could not add filter 0 for %pM\n", vsi->netdev->dev_addr); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); return -ENOMEM; } } @@ -2417,25 +2471,26 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) /* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */ if (vid > 0 && !vsi->info.pvid) { - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { - if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY, - is_vf, is_netdev)) + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_REMOVE) continue; - i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, - is_vf, is_netdev); - add_f = i40e_add_filter(vsi, f->macaddr, - 0, is_vf, is_netdev); + del_f = i40e_find_filter(vsi, f->macaddr, + I40E_VLAN_ANY); + if (!del_f) + continue; + __i40e_del_filter(vsi, del_f); + add_f = i40e_add_filter(vsi, f->macaddr, 0); if (!add_f) { dev_info(&vsi->back->pdev->dev, "Could not add filter 0 for %pM\n", f->macaddr); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); return -ENOMEM; } } } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* schedule our worker thread which will take care of * applying the new filter changes @@ -2448,79 +2503,31 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) * i40e_vsi_kill_vlan - Remove vsi membership for given vlan * @vsi: the vsi being configured * @vid: vlan id to be removed (0 = untagged only , -1 = any) - * - * Return: 0 on success or negative otherwise **/ -int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) { struct net_device *netdev = vsi->netdev; - struct i40e_mac_filter *f, *ftmp, *add_f; - bool is_vf, is_netdev; - int filter_count = 0; - - is_vf = (vsi->type == I40E_VSI_SRIOV); - is_netdev = !!(netdev); + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt; /* Locked once because all functions invoked below iterates list */ - spin_lock_bh(&vsi->mac_filter_list_lock); - - if (is_netdev) - i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev); - - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) - i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev); + spin_lock_bh(&vsi->mac_filter_hash_lock); - /* go through all the filters for this VSI and if there is only - * vid == 0 it means there are no other filters, so vid 0 must - * be replaced with -1. This signifies that we should from now - * on accept any traffic (with any tag present, or untagged) - */ - list_for_each_entry(f, &vsi->mac_filter_list, list) { - if (is_netdev) { - if (f->vlan && - ether_addr_equal(netdev->dev_addr, f->macaddr)) - filter_count++; - } + if (vsi->netdev) + i40e_del_filter(vsi, netdev->dev_addr, vid); - if (f->vlan) - filter_count++; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->vlan == vid) + __i40e_del_filter(vsi, f); } - if (!filter_count && is_netdev) { - i40e_del_filter(vsi, netdev->dev_addr, 0, is_vf, is_netdev); - f = i40e_add_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY, - is_vf, is_netdev); - if (!f) { - dev_info(&vsi->back->pdev->dev, - "Could not add filter %d for %pM\n", - I40E_VLAN_ANY, netdev->dev_addr); - spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; - } - } - - if (!filter_count) { - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { - i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev); - add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY, - is_vf, is_netdev); - if (!add_f) { - dev_info(&vsi->back->pdev->dev, - "Could not add filter %d for %pM\n", - I40E_VLAN_ANY, f->macaddr); - spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; - } - } - } - - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* schedule our worker thread which will take care of * applying the new filter changes */ i40e_service_event_schedule(vsi->back); - return 0; } /** @@ -3322,6 +3329,33 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data) } /** + * i40e_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + **/ +static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct i40e_q_vector *q_vector = + container_of(notify, struct i40e_q_vector, affinity_notify); + + q_vector->affinity_mask = *mask; +} + +/** + * i40e_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + **/ +static void i40e_irq_affinity_release(struct kref *ref) {} + +/** * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts * @vsi: the VSI being configured * @basename: name for the vector @@ -3336,10 +3370,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; + int irq_num; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; + irq_num = pf->msix_entries[base + vector].vector; + if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); @@ -3354,7 +3391,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = request_irq(pf->msix_entries[base + vector].vector, + err = request_irq(irq_num, vsi->irq_handler, 0, q_vector->name, @@ -3364,9 +3401,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) "MSIX request_irq failed, error: %d\n", err); goto free_queue_irqs; } + + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = i40e_irq_affinity_notify; + q_vector->affinity_notify.release = i40e_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* assign the mask for this irq */ - irq_set_affinity_hint(pf->msix_entries[base + vector].vector, - &q_vector->affinity_mask); + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } vsi->irqs_ready = true; @@ -3375,10 +3416,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) free_queue_irqs: while (vector) { vector--; - irq_set_affinity_hint(pf->msix_entries[base + vector].vector, - NULL); - free_irq(pf->msix_entries[base + vector].vector, - &(vsi->q_vectors[vector])); + irq_num = pf->msix_entries[base + vector].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &vsi->q_vectors[vector]); } return err; } @@ -3973,30 +4014,36 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) } /** - * i40e_vsi_control_rings - Start or stop a VSI's rings + * i40e_vsi_start_rings - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool request) +int i40e_vsi_start_rings(struct i40e_vsi *vsi) { int ret = 0; /* do rx first for enable and last for disable */ - if (request) { - ret = i40e_vsi_control_rx(vsi, request); - if (ret) - return ret; - ret = i40e_vsi_control_tx(vsi, request); - } else { - /* Ignore return value, we need to shutdown whatever we can */ - i40e_vsi_control_tx(vsi, request); - i40e_vsi_control_rx(vsi, request); - } + ret = i40e_vsi_control_rx(vsi, true); + if (ret) + return ret; + ret = i40e_vsi_control_tx(vsi, true); return ret; } /** + * i40e_vsi_stop_rings - Stop a VSI's rings + * @vsi: the VSI being configured + **/ +void i40e_vsi_stop_rings(struct i40e_vsi *vsi) +{ + /* do rx first for enable and last for disable + * Ignore return value, we need to shutdown whatever we can + */ + i40e_vsi_control_tx(vsi, false); + i40e_vsi_control_rx(vsi, false); +} + +/** * i40e_vsi_free_irq - Free the irq association with the OS * @vsi: the VSI being configured **/ @@ -4017,19 +4064,23 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) vsi->irqs_ready = false; for (i = 0; i < vsi->num_q_vectors; i++) { - u16 vector = i + base; + int irq_num; + u16 vector; + + vector = i + base; + irq_num = pf->msix_entries[vector].vector; /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || !vsi->q_vectors[i]->num_ringpairs) continue; + /* clear the affinity notifier in the IRQ descriptor */ + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(pf->msix_entries[vector].vector, - NULL); - synchronize_irq(pf->msix_entries[vector].vector); - free_irq(pf->msix_entries[vector].vector, - vsi->q_vectors[i]); + irq_set_affinity_hint(irq_num, NULL); + synchronize_irq(irq_num); + free_irq(irq_num, vsi->q_vectors[i]); /* Tear down the interrupt queue link list * @@ -5190,7 +5241,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) i40e_configure_msi_and_legacy(vsi); /* start rings */ - err = i40e_vsi_control_rings(vsi, true); + err = i40e_vsi_start_rings(vsi); if (err) return err; @@ -5287,7 +5338,7 @@ void i40e_down(struct i40e_vsi *vsi) netif_tx_disable(vsi->netdev); } i40e_vsi_disable_irq(vsi); - i40e_vsi_control_rings(vsi, false); + i40e_vsi_stop_rings(vsi); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { @@ -6670,7 +6721,6 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi); static void i40e_fdir_sb_setup(struct i40e_pf *pf) { struct i40e_vsi *vsi; - int i; /* quick workaround for an NVM issue that leaves a critical register * uninitialized @@ -6681,6 +6731,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) 0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb, 0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21, 0x95b3a76d}; + int i; for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++) wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]); @@ -6690,13 +6741,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) return; /* find existing VSI and see if it needs configuring */ - vsi = NULL; - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { - vsi = pf->vsi[i]; - break; - } - } + vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); /* create a new VSI if none exists */ if (!vsi) { @@ -6718,15 +6763,12 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) **/ static void i40e_fdir_teardown(struct i40e_pf *pf) { - int i; + struct i40e_vsi *vsi; i40e_fdir_filter_exit(pf); - for (i = 0; i < pf->num_alloc_vsi; i++) { - if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { - i40e_vsi_release(pf->vsi[i]); - break; - } - } + vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); + if (vsi) + i40e_vsi_release(vsi); } /** @@ -7354,7 +7396,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) pf->rss_table_size : 64; vsi->netdev_registered = false; vsi->work_limit = I40E_DEFAULT_IRQ_WORK; - INIT_LIST_HEAD(&vsi->mac_filter_list); + hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; ret = i40e_set_num_rings_in_vsi(vsi); @@ -7369,7 +7411,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings); /* Initialize VSI lock */ - spin_lock_init(&vsi->mac_filter_list_lock); + spin_lock_init(&vsi->mac_filter_hash_lock); pf->vsi[vsi_idx] = vsi; ret = vsi_idx; goto unlock_pf; @@ -8345,8 +8387,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) i40e_pf_config_rss(pf); } - dev_info(&pf->pdev->dev, "RSS count/HW max RSS count: %d/%d\n", - pf->alloc_rss_size, pf->rss_size_max); + dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count: %d/%d\n", + vsi->req_queue_pairs, pf->rss_size_max); return pf->alloc_rss_size; } @@ -8489,15 +8531,6 @@ static int i40e_sw_init(struct i40e_pf *pf) int err = 0; int size; - pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE, - (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)); - if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) { - if (I40E_DEBUG_USER & debug) - pf->hw.debug_mask = debug; - pf->msg_enable = netif_msg_init((debug & ~I40E_DEBUG_USER), - I40E_DEFAULT_MSG_ENABLE); - } - /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | I40E_FLAG_MSI_ENABLED | @@ -9163,24 +9196,18 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); - /* The following steps are necessary to prevent reception - * of tagged packets - some older NVM configurations load a - * default a MAC-VLAN filter that accepts any tagged packet - * which must be replaced by a normal filter. - */ - i40e_rm_default_mac_filter(vsi, mac_addr); - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, true); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); + i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY); + spin_unlock_bh(&vsi->mac_filter_hash_lock); } else { /* relate the VSI_VMDQ name to the VSI_MAIN name */ snprintf(netdev->name, IFNAMSIZ, "%sv%%d", pf->vsi[pf->lan_vsi]->netdev->name); random_ether_addr(mac_addr); - spin_lock_bh(&vsi->mac_filter_list_lock); - i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); + i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY); + spin_unlock_bh(&vsi->mac_filter_hash_lock); } ether_addr_copy(netdev->dev_addr, mac_addr); @@ -9198,6 +9225,11 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) i40e_fcoe_config_netdev(netdev, vsi); #endif + /* MTU range: 68 - 9706 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = I40E_MAX_RXBUFFER - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + return 0; } @@ -9264,7 +9296,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; - struct i40e_mac_filter *f, *ftmp; + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt; u8 enabled_tc = 0x1; /* TC0 enabled */ int f_count = 0; @@ -9463,13 +9497,13 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) vsi->active_filters = 0; clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* If macvlan filters already exist, force them to get loaded */ - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { f->state = I40E_FILTER_NEW; f_count++; } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); if (f_count) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; @@ -9499,11 +9533,12 @@ err: **/ int i40e_vsi_release(struct i40e_vsi *vsi) { - struct i40e_mac_filter *f, *ftmp; + struct i40e_mac_filter *f; + struct hlist_node *h; struct i40e_veb *veb = NULL; struct i40e_pf *pf; u16 uplink_seid; - int i, n; + int i, n, bkt; pf = vsi->back; @@ -9533,11 +9568,19 @@ int i40e_vsi_release(struct i40e_vsi *vsi) i40e_vsi_disable_irq(vsi); } - spin_lock_bh(&vsi->mac_filter_list_lock); - list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) - i40e_del_filter(vsi, f->macaddr, f->vlan, - f->is_vf, f->is_netdev); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); + + /* clear the sync flag on all filters */ + if (vsi->netdev) { + __dev_uc_unsync(vsi->netdev, NULL); + __dev_mc_unsync(vsi->netdev, NULL); + } + + /* make sure any remaining filters are marked for deletion */ + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) + __i40e_del_filter(vsi, f); + + spin_unlock_bh(&vsi->mac_filter_hash_lock); i40e_sync_vsi_filters(vsi); @@ -9681,8 +9724,6 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0; pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid; i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc); - if (vsi->type == I40E_VSI_MAIN) - i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr); /* assign it some queues */ ret = i40e_alloc_rings(vsi); @@ -10806,10 +10847,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); - if (debug != -1) { - pf->msg_enable = pf->hw.debug_mask; - pf->msg_enable = debug; - } + pf->msg_enable = netif_msg_init(debug, + NETIF_MSG_DRV | + NETIF_MSG_PROBE | + NETIF_MSG_LINK); + if (debug < -1) + pf->hw.debug_mask = debug; /* do a special CORER for clearing PXE mode once at init */ if (hw->revision_id == 0 && @@ -10951,7 +10994,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED); + pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED); /* Continue without DCB enabled */ } #endif /* CONFIG_I40E_DCB */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 954efe3118db..38ee18f11124 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -722,9 +722,20 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; } + /* Clear error status on read */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + return 0; } + /* Clear status even it is not read and log */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { + i40e_debug(hw, I40E_DEBUG_NVM, + "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT: status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); @@ -1074,6 +1085,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) } hw->nvm_wait_opcode = 0; + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT_WAIT: hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index f1feceab758a..5e2272c9e717 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -159,16 +159,15 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); struct timespec64 now, then; - unsigned long flags; then = ns_to_timespec64(delta); - spin_lock_irqsave(&pf->tmreg_lock, flags); + mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, &now); now = timespec64_add(now, then); i40e_ptp_write(pf, (const struct timespec64 *)&now); - spin_unlock_irqrestore(&pf->tmreg_lock, flags); + mutex_unlock(&pf->tmreg_lock); return 0; } @@ -184,11 +183,10 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int i40e_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - unsigned long flags; - spin_lock_irqsave(&pf->tmreg_lock, flags); + mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, ts); - spin_unlock_irqrestore(&pf->tmreg_lock, flags); + mutex_unlock(&pf->tmreg_lock); return 0; } @@ -205,11 +203,10 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - unsigned long flags; - spin_lock_irqsave(&pf->tmreg_lock, flags); + mutex_lock(&pf->tmreg_lock); i40e_ptp_write(pf, ts); - spin_unlock_irqrestore(&pf->tmreg_lock, flags); + mutex_unlock(&pf->tmreg_lock); return 0; } @@ -230,6 +227,47 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp, } /** + * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events + * @pf: the PF data structure + * + * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers + * for noticed latch events. This allows the driver to keep track of the first + * time a latch event was noticed which will be used to help clear out Rx + * timestamps for packets that got dropped or lost. + * + * This function will return the current value of I40E_PRTTSYN_STAT_1 and is + * expected to be called only while under the ptp_rx_lock. + **/ +static u32 i40e_ptp_get_rx_events(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + u32 prttsyn_stat, new_latch_events; + int i; + + prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1); + new_latch_events = prttsyn_stat & ~pf->latch_event_flags; + + /* Update the jiffies time for any newly latched timestamp. This + * ensures that we store the time that we first discovered a timestamp + * was latched by the hardware. The service task will later determine + * if we should free the latch and drop that timestamp should too much + * time pass. This flow ensures that we only update jiffies for new + * events latched since the last time we checked, and not all events + * currently latched, so that the service task accounting remains + * accurate. + */ + for (i = 0; i < 4; i++) { + if (new_latch_events & BIT(i)) + pf->latch_events[i] = jiffies; + } + + /* Finally, we store the current status of the Rx timestamp latches */ + pf->latch_event_flags = prttsyn_stat; + + return prttsyn_stat; +} + +/** * i40e_ptp_rx_hang - Detect error case when Rx timestamp registers are hung * @vsi: The VSI with the rings relevant to 1588 * @@ -242,10 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - struct i40e_ring *rx_ring; - unsigned long rx_event; - u32 prttsyn_stat; - int n; + int i; /* Since we cannot turn off the Rx timestamp logic if the device is * configured for Tx timestamping, we check if Rx timestamping is @@ -255,42 +290,30 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx) return; - prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1); + spin_lock_bh(&pf->ptp_rx_lock); - /* Unless all four receive timestamp registers are latched, we are not - * concerned about a possible PTP Rx hang, so just update the timeout - * counter and exit. - */ - if (!(prttsyn_stat & ((I40E_PRTTSYN_STAT_1_RXT0_MASK << - I40E_PRTTSYN_STAT_1_RXT0_SHIFT) | - (I40E_PRTTSYN_STAT_1_RXT1_MASK << - I40E_PRTTSYN_STAT_1_RXT1_SHIFT) | - (I40E_PRTTSYN_STAT_1_RXT2_MASK << - I40E_PRTTSYN_STAT_1_RXT2_SHIFT) | - (I40E_PRTTSYN_STAT_1_RXT3_MASK << - I40E_PRTTSYN_STAT_1_RXT3_SHIFT)))) { - pf->last_rx_ptp_check = jiffies; - return; - } + /* Update current latch times for Rx events */ + i40e_ptp_get_rx_events(pf); - /* Determine the most recent watchdog or rx_timestamp event. */ - rx_event = pf->last_rx_ptp_check; - for (n = 0; n < vsi->num_queue_pairs; n++) { - rx_ring = vsi->rx_rings[n]; - if (time_after(rx_ring->last_rx_timestamp, rx_event)) - rx_event = rx_ring->last_rx_timestamp; + /* Check all the currently latched Rx events and see whether they have + * been latched for over a second. It is assumed that any timestamp + * should have been cleared within this time, or else it was captured + * for a dropped frame that the driver never received. Thus, we will + * clear any timestamp that has been latched for over 1 second. + */ + for (i = 0; i < 4; i++) { + if ((pf->latch_event_flags & BIT(i)) && + time_is_before_jiffies(pf->latch_events[i] + HZ)) { + rd32(hw, I40E_PRTTSYN_RXTIME_H(i)); + pf->latch_event_flags &= ~BIT(i); + pf->rx_hwtstamp_cleared++; + dev_warn(&pf->pdev->dev, + "Clearing a missed Rx timestamp event for RXTIME[%d]\n", + i); + } } - /* Only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5 * HZ)) { - rd32(hw, I40E_PRTTSYN_RXTIME_H(0)); - rd32(hw, I40E_PRTTSYN_RXTIME_H(1)); - rd32(hw, I40E_PRTTSYN_RXTIME_H(2)); - rd32(hw, I40E_PRTTSYN_RXTIME_H(3)); - pf->last_rx_ptp_check = jiffies; - pf->rx_hwtstamp_cleared++; - WARN_ONCE(1, "Detected Rx timestamp register hang\n"); - } + spin_unlock_bh(&pf->ptp_rx_lock); } /** @@ -353,14 +376,25 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index) hw = &pf->hw; - prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1); + spin_lock_bh(&pf->ptp_rx_lock); - if (!(prttsyn_stat & BIT(index))) + /* Get current Rx events and update latch times */ + prttsyn_stat = i40e_ptp_get_rx_events(pf); + + /* TODO: Should we warn about missing Rx timestamp event? */ + if (!(prttsyn_stat & BIT(index))) { + spin_unlock_bh(&pf->ptp_rx_lock); return; + } + + /* Clear the latched event since we're about to read its register */ + pf->latch_event_flags &= ~BIT(index); lo = rd32(hw, I40E_PRTTSYN_RXTIME_L(index)); hi = rd32(hw, I40E_PRTTSYN_RXTIME_H(index)); + spin_unlock_bh(&pf->ptp_rx_lock); + ns = (((u64)hi) << 32) | lo; i40e_ptp_convert_to_hwtstamp(skb_hwtstamps(skb), ns); @@ -514,12 +548,15 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, } /* Clear out all 1588-related registers to clear and unlatch them. */ + spin_lock_bh(&pf->ptp_rx_lock); rd32(hw, I40E_PRTTSYN_STAT_0); rd32(hw, I40E_PRTTSYN_TXTIME_H); rd32(hw, I40E_PRTTSYN_RXTIME_H(0)); rd32(hw, I40E_PRTTSYN_RXTIME_H(1)); rd32(hw, I40E_PRTTSYN_RXTIME_H(2)); rd32(hw, I40E_PRTTSYN_RXTIME_H(3)); + pf->latch_event_flags = 0; + spin_unlock_bh(&pf->ptp_rx_lock); /* Enable/disable the Tx timestamp interrupt based on user input. */ regval = rd32(hw, I40E_PRTTSYN_CTL0); @@ -658,10 +695,8 @@ void i40e_ptp_init(struct i40e_pf *pf) return; } - /* we have to initialize the lock first, since we can't control - * when the user will enter the PHC device entry points - */ - spin_lock_init(&pf->tmreg_lock); + mutex_init(&pf->tmreg_lock); + spin_lock_init(&pf->ptp_rx_lock); /* ensure we have a clock device */ err = i40e_ptp_create_clock(pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6287bf63c43c..5544b509832f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -122,14 +122,10 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, struct device *dev; dma_addr_t dma; u32 td_cmd = 0; - u16 delay = 0; u16 i; /* find existing FDIR VSI */ - vsi = NULL; - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) - vsi = pf->vsi[i]; + vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (!vsi) return -ENOENT; @@ -137,15 +133,11 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, dev = tx_ring->dev; /* we need two descriptors to add/del a filter and we can wait */ - do { - if (I40E_DESC_UNUSED(tx_ring) > 1) - break; + for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) { + if (!i) + return -EAGAIN; msleep_interruptible(1); - delay++; - } while (delay < I40E_FD_CLEAN_DELAY); - - if (!(I40E_DESC_UNUSED(tx_ring) > 1)) - return -EAGAIN; + } dma = dma_map_single(dev, raw_packet, I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); @@ -335,22 +327,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, return err ? -EOPNOTSUPP : 0; } -/** - * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for - * a specific flow spec - * @vsi: pointer to the targeted VSI - * @fd_data: the flow director data required for the FDir descriptor - * @add: true adds a filter, false removes it - * - * Returns 0 if the filters were successfully added or removed - **/ -static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) -{ - return -EOPNOTSUPP; -} - #define I40E_IP_DUMMY_PACKET_LEN 34 /** * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for @@ -433,12 +409,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case UDP_V4_FLOW: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; - case SCTP_V4_FLOW: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); - break; - case IPV4_FLOW: - ret = i40e_add_del_fdir_ipv4(vsi, input, add); - break; case IP_USER_FLOW: switch (input->ip4_proto) { case IPPROTO_TCP: @@ -447,15 +417,16 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case IPPROTO_UDP: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; - case IPPROTO_SCTP: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); - break; - default: + case IPPROTO_IP: ret = i40e_add_del_fdir_ipv4(vsi, input, add); break; + default: + /* We cannot support masking based on protocol */ + goto unsupported_flow; } break; default: +unsupported_flow: dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", input->flow_type); ret = -EINVAL; @@ -645,7 +616,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) return 0; } -#define WB_STRIDE 0x3 +#define WB_STRIDE 4 /** * i40e_clean_tx_irq - Reclaim resources after transmit completes @@ -761,7 +732,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, unsigned int j = i40e_get_tx_pending(tx_ring, false); if (budget && - ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && + ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__I40E_DOWN, &vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; @@ -1246,7 +1217,6 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) * because each write-back erases this info. */ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); - rx_desc->read.hdr_addr = 0; rx_desc++; bi++; @@ -1437,13 +1407,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> + u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK; + u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; - if (unlikely(rsyn)) { - i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn); - rx_ring->last_rx_timestamp = jiffies; - } + if (unlikely(tsynvalid)) + i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn); i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); @@ -1767,7 +1736,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; struct sk_buff *skb; - u32 rx_status; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1781,21 +1749,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; - - if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) - break; - /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr * which is always zero because packet split isn't used, if the * hardware wrote DD then it will be non-zero */ - if (!rx_desc->wb.qword1.status_error_len) + if (!i40e_test_staterr(rx_desc, + BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) break; /* This memory barrier is needed to keep us from reading @@ -1829,6 +1789,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; + /* populate checksum, VLAN, and protocol */ i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); @@ -2025,12 +1989,25 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { + const cpumask_t *aff_mask = &q_vector->affinity_mask; + int cpu_id = smp_processor_id(); + + /* It is possible that the interrupt affinity has changed but, + * if the cpu is pegged at 100%, polling will never exit while + * traffic continues and the interrupt will be stuck on this + * cpu. We check to make sure affinity is correct before we + * continue to poll, otherwise we must stop polling so the + * interrupt can move to the correct cpu. + */ + if (likely(cpumask_test_cpu(cpu_id, aff_mask) || + !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) { tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_enable_wb_on_itr(vsi, q_vector); + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; + i40e_enable_wb_on_itr(vsi, q_vector); + } + return budget; } - return budget; } if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) @@ -2038,11 +2015,18 @@ tx_only: /* Work is done so exit the polling mode and re-enable the interrupt */ napi_complete_done(napi, work_done); - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { - i40e_update_enable_itr(vsi, q_vector); - } else { /* Legacy mode */ + + /* If we're prematurely stopping polling to fix the interrupt + * affinity we want to make sure polling starts back up so we + * issue a call to i40e_force_wb which triggers a SW interrupt. + */ + if (!clean_complete) + i40e_force_wb(vsi, q_vector); + else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) i40e_irq_dynamic_enable_icr0(vsi->back, false); - } + else + i40e_update_enable_itr(vsi, q_vector); + return 0; } @@ -2716,9 +2700,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; - u16 desc_count = 0; - bool tail_bump = true; - bool do_rs = false; + u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2801,8 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i++; if (i == tx_ring->count) @@ -2810,66 +2791,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + /* write last descriptor with EOP bit */ + td_cmd |= I40E_TX_DESC_CMD_EOP; + + /* We can OR these values together as they both are checked against + * 4 below and at this point desc_count will be used as a boolean value + * after this if/else block. + */ + desc_count |= ++tx_ring->packet_stride; + /* Algorithm to optimize tail and RS bit setting: - * if xmit_more is supported - * if xmit_more is true - * do not update tail and do not mark RS bit. - * if xmit_more is false and last xmit_more was false - * if every packet spanned less than 4 desc - * then set RS bit on 4th packet and update tail - * on every packet - * else - * update tail and set RS bit on every packet. - * if xmit_more is false and last_xmit_more was true - * update tail and set RS bit. + * if queue is stopped + * mark RS bit + * reset packet counter + * else if xmit_more is supported and is true + * advance packet counter to 4 + * reset desc_count to 0 * - * Optimization: wmb to be issued only in case of tail update. - * Also optimize the Descriptor WB path for RS bit with the same - * algorithm. + * if desc_count >= 4 + * mark RS bit + * reset packet counter + * if desc_count > 0 + * update tail * - * Note: If there are less than 4 packets + * Note: If there are less than 4 descriptors * pending and interrupts were disabled the service task will * trigger a force WB. */ - if (skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring))) { - tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - tail_bump = false; - } else if (!skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring)) && - (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && - (tx_ring->packet_stride < WB_STRIDE) && - (desc_count < WB_STRIDE)) { - tx_ring->packet_stride++; - } else { + if (netif_xmit_stopped(txring_txq(tx_ring))) { + goto do_rs; + } else if (skb->xmit_more) { + /* set stride to arm on next packet and reset desc_count */ + tx_ring->packet_stride = WB_STRIDE; + desc_count = 0; + } else if (desc_count >= WB_STRIDE) { +do_rs: + /* write last descriptor with RS bit set */ + td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; - tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - do_rs = true; } - if (do_rs) - tx_ring->packet_stride = 0; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : - I40E_TX_DESC_CMD_EOP) << - I40E_TXD_QW1_CMD_SHIFT); + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; /* notify HW of packet */ - if (!tail_bump) { - prefetchw(tx_desc + 1); - } else { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); + if (desc_count) { writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); } + return; dma_error: diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 508840585645..de8550f4e3a4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -307,15 +307,12 @@ struct i40e_ring { u8 atr_sample_rate; u8 atr_count; - unsigned long last_rx_timestamp; - bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ u8 packet_stride; u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) /* stats structs */ struct i40e_queue_stats stats; diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index bd5f13bef83c..d9a266041bf1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -366,6 +366,7 @@ enum i40e_nvmupd_state { I40E_NVMUPD_STATE_WRITING, I40E_NVMUPD_STATE_INIT_WAIT, I40E_NVMUPD_STATE_WRITE_WAIT, + I40E_NVMUPD_STATE_ERROR }; /* nvm_access definition and its masks/shifts need to be accessible to diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index f861d3109d1a..974ba2baf6ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -165,6 +165,10 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 +#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ + I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) + struct i40e_virtchnl_vf_resource { u16 num_vsis; u16 num_queue_pairs; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 54b8ee2583f1..53b46553dd8d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -686,17 +686,17 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) if (vf->port_vlan_id) i40e_vsi_add_pvid(vsi, vf->port_vlan_id); - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); if (is_valid_ether_addr(vf->default_lan_addr.addr)) { f = i40e_add_filter(vsi, vf->default_lan_addr.addr, - vf->port_vlan_id ? vf->port_vlan_id : -1, - true, false); + vf->port_vlan_id ? + vf->port_vlan_id : -1); if (!f) dev_info(&pf->pdev->dev, "Could not add MAC filter %pM for VF %d\n", vf->default_lan_addr.addr, vf->vf_id); } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena); i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), @@ -811,6 +811,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf) i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]); vf->lan_vsi_idx = 0; vf->lan_vsi_id = 0; + vf->num_mac = 0; } msix_vf = pf->hw.func_caps.num_msix_vectors_vf; @@ -990,7 +991,7 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) if (vf->lan_vsi_idx == 0) goto complete_reset; - i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false); + i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]); complete_reset: /* reallocate VF resources to reset the VSI state */ i40e_free_vf_res(vf); @@ -1031,8 +1032,7 @@ void i40e_free_vfs(struct i40e_pf *pf) i40e_notify_client_of_vf_enable(pf, 0); for (i = 0; i < pf->num_alloc_vfs; i++) if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) - i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx], - false); + i40e_vsi_stop_rings(pf->vsi[pf->vf[i].lan_vsi_idx]); /* Disable IOV before freeing resources. This lets any VF drivers * running in the host get themselves cleaned up before we yank @@ -1449,9 +1449,9 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; - int num_vlans = 0; + int num_vlans = 0, bkt; - list_for_each_entry(f, &vsi->mac_filter_list, list) { + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) num_vlans++; } @@ -1481,6 +1481,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, struct i40e_vsi *vsi; bool alluni = false; int aq_err = 0; + int bkt; vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || @@ -1507,7 +1508,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vf->port_vlan_id, NULL); } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { - list_for_each_entry(f, &vsi->mac_filter_list, list) { + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) continue; aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, @@ -1557,7 +1558,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vf->port_vlan_id, NULL); } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { - list_for_each_entry(f, &vsi->mac_filter_list, list) { + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { aq_ret = 0; if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) { aq_ret = @@ -1757,7 +1758,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], true)) + if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) aq_ret = I40E_ERR_TIMEOUT; error_param: /* send the response to the VF */ @@ -1796,8 +1797,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false)) - aq_ret = I40E_ERR_TIMEOUT; + i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]); error_param: /* send the response to the VF */ @@ -1927,20 +1927,18 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) /* Lock once, because all function inside for loop accesses VSI's * MAC filter list which needs to be protected using same lock. */ - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* add new addresses to the list */ for (i = 0; i < al->num_elements; i++) { struct i40e_mac_filter *f; - f = i40e_find_mac(vsi, al->list[i].addr, true, false); + f = i40e_find_mac(vsi, al->list[i].addr); if (!f) { if (i40e_is_vsi_in_vlan(vsi)) - f = i40e_put_mac_in_vlan(vsi, al->list[i].addr, - true, false); + f = i40e_put_mac_in_vlan(vsi, al->list[i].addr); else - f = i40e_add_filter(vsi, al->list[i].addr, -1, - true, false); + f = i40e_add_filter(vsi, al->list[i].addr, -1); } if (!f) { @@ -1948,13 +1946,13 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) "Unable to add MAC filter %pM for VF %d\n", al->list[i].addr, vf->vf_id); ret = I40E_ERR_PARAM; - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } else { vf->num_mac++; } } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* program the updated filter list */ ret = i40e_sync_vsi_filters(vsi); @@ -2003,18 +2001,18 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) } vsi = pf->vsi[vf->lan_vsi_idx]; - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ for (i = 0; i < al->num_elements; i++) - if (i40e_del_mac_all_vlan(vsi, al->list[i].addr, true, false)) { + if (i40e_del_mac_all_vlan(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } else { vf->num_mac--; } - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); /* program the updated filter list */ ret = i40e_sync_vsi_filters(vsi); @@ -2139,9 +2137,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) } for (i = 0; i < vfl->num_elements; i++) { - int ret = i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]); - if (!ret) - vf->num_vlan--; + i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]); + vf->num_vlan--; if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states)) i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid, @@ -2153,11 +2150,6 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) false, vfl->vlan_id[i], NULL); - - if (ret) - dev_err(&pf->pdev->dev, - "Unable to delete VLAN filter %d for VF %d, error %d\n", - vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -2689,6 +2681,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct i40e_mac_filter *f; struct i40e_vf *vf; int ret = 0; + int bkt; /* validate the request */ if (vf_id >= pf->num_alloc_vfs) { @@ -2715,23 +2708,22 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } /* Lock once because below invoked function add/del_filter requires - * mac_filter_list_lock to be held + * mac_filter_hash_lock to be held */ - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete the temporary mac address */ if (!is_zero_ether_addr(vf->default_lan_addr.addr)) i40e_del_filter(vsi, vf->default_lan_addr.addr, - vf->port_vlan_id ? vf->port_vlan_id : -1, - true, false); + vf->port_vlan_id ? vf->port_vlan_id : -1); /* Delete all the filters for this VSI - we're going to kill it * anyway. */ - list_for_each_entry(f, &vsi->mac_filter_list, list) - i40e_del_filter(vsi, f->macaddr, f->vlan, true, false); + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) + i40e_del_filter(vsi, f->macaddr, f->vlan); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ @@ -2803,9 +2795,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - spin_lock_bh(&vsi->mac_filter_list_lock); + spin_lock_bh(&vsi->mac_filter_hash_lock); is_vsi_in_vlan = i40e_is_vsi_in_vlan(vsi); - spin_unlock_bh(&vsi->mac_filter_list_lock); + spin_unlock_bh(&vsi->mac_filter_hash_lock); if (le16_to_cpu(vsi->info.pvid) == 0 && is_vsi_in_vlan) { dev_err(&pf->pdev->dev, @@ -2835,13 +2827,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, if (vsi->info.pvid) { /* kill old VLAN */ - ret = i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) & - VLAN_VID_MASK)); - if (ret) { - dev_info(&vsi->back->pdev->dev, - "remove VLAN failed, ret=%d, aq_err=%d\n", - ret, pf->hw.aq.asq_last_status); - } + i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) & + VLAN_VID_MASK)); } if (vlan_id || qos) ret = i40e_vsi_add_pvid(vsi, vlanprio); @@ -2940,7 +2927,7 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, } if (max_tx_rate > speed) { - dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.", + dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n", max_tx_rate, vf->vf_id); ret = -EINVAL; goto error; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 44f7ed7583dd..96385156b824 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -912,11 +912,11 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc); desc_idx = ntc; + hw->aq.arq_last_status = + (enum i40e_admin_queue_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & I40E_AQ_FLAG_ERR) { ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; - hw->aq.arq_last_status = - (enum i40e_admin_queue_err)le16_to_cpu(desc->retval); i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: Event received with error 0x%X.\n", diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 75f2a2cdd738..c4b174afd253 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -150,7 +150,7 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) return 0; } -#define WB_STRIDE 0x3 +#define WB_STRIDE 4 /** * i40e_clean_tx_irq - Reclaim resources after transmit completes @@ -266,7 +266,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, unsigned int j = i40evf_get_tx_pending(tx_ring, false); if (budget && - ((j / (WB_STRIDE + 1)) == 0) && (j > 0) && + ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__I40E_DOWN, &vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; @@ -705,7 +705,6 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) * because each write-back erases this info. */ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); - rx_desc->read.hdr_addr = 0; rx_desc++; bi++; @@ -1209,7 +1208,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; struct sk_buff *skb; - u32 rx_status; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1223,21 +1221,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; - - if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) - break; - /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr * which is always zero because packet split isn't used, if the * hardware wrote DD then it will be non-zero */ - if (!rx_desc->wb.qword1.status_error_len) + if (!i40e_test_staterr(rx_desc, + BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) break; /* This memory barrier is needed to keep us from reading @@ -1271,6 +1261,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; + /* populate checksum, VLAN, and protocol */ i40evf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); @@ -1461,12 +1455,24 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { + const cpumask_t *aff_mask = &q_vector->affinity_mask; + int cpu_id = smp_processor_id(); + + /* It is possible that the interrupt affinity has changed but, + * if the cpu is pegged at 100%, polling will never exit while + * traffic continues and the interrupt will be stuck on this + * cpu. We check to make sure affinity is correct before we + * continue to poll, otherwise we must stop polling so the + * interrupt can move to the correct cpu. + */ + if (likely(cpumask_test_cpu(cpu_id, aff_mask))) { tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_enable_wb_on_itr(vsi, q_vector); + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; + i40e_enable_wb_on_itr(vsi, q_vector); + } + return budget; } - return budget; } if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) @@ -1474,7 +1480,16 @@ tx_only: /* Work is done so exit the polling mode and re-enable the interrupt */ napi_complete_done(napi, work_done); - i40e_update_enable_itr(vsi, q_vector); + + /* If we're prematurely stopping polling to fix the interrupt + * affinity we want to make sure polling starts back up so we + * issue a call to i40evf_force_wb which triggers a SW interrupt. + */ + if (!clean_complete) + i40evf_force_wb(vsi, q_vector); + else + i40e_update_enable_itr(vsi, q_vector); + return 0; } @@ -1935,9 +1950,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; - u16 desc_count = 0; - bool tail_bump = true; - bool do_rs = false; + u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2020,8 +2033,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i++; if (i == tx_ring->count) @@ -2029,66 +2041,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + /* write last descriptor with EOP bit */ + td_cmd |= I40E_TX_DESC_CMD_EOP; + + /* We can OR these values together as they both are checked against + * 4 below and at this point desc_count will be used as a boolean value + * after this if/else block. + */ + desc_count |= ++tx_ring->packet_stride; + /* Algorithm to optimize tail and RS bit setting: - * if xmit_more is supported - * if xmit_more is true - * do not update tail and do not mark RS bit. - * if xmit_more is false and last xmit_more was false - * if every packet spanned less than 4 desc - * then set RS bit on 4th packet and update tail - * on every packet - * else - * update tail and set RS bit on every packet. - * if xmit_more is false and last_xmit_more was true - * update tail and set RS bit. + * if queue is stopped + * mark RS bit + * reset packet counter + * else if xmit_more is supported and is true + * advance packet counter to 4 + * reset desc_count to 0 * - * Optimization: wmb to be issued only in case of tail update. - * Also optimize the Descriptor WB path for RS bit with the same - * algorithm. + * if desc_count >= 4 + * mark RS bit + * reset packet counter + * if desc_count > 0 + * update tail * - * Note: If there are less than 4 packets + * Note: If there are less than 4 descriptors * pending and interrupts were disabled the service task will * trigger a force WB. */ - if (skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring))) { - tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - tail_bump = false; - } else if (!skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring)) && - (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && - (tx_ring->packet_stride < WB_STRIDE) && - (desc_count < WB_STRIDE)) { - tx_ring->packet_stride++; - } else { + if (netif_xmit_stopped(txring_txq(tx_ring))) { + goto do_rs; + } else if (skb->xmit_more) { + /* set stride to arm on next packet and reset desc_count */ + tx_ring->packet_stride = WB_STRIDE; + desc_count = 0; + } else if (desc_count >= WB_STRIDE) { +do_rs: + /* write last descriptor with RS bit set */ + td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; - tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - do_rs = true; } - if (do_rs) - tx_ring->packet_stride = 0; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : - I40E_TX_DESC_CMD_EOP) << - I40E_TXD_QW1_CMD_SHIFT); + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; /* notify HW of packet */ - if (!tail_bump) { - prefetchw(tx_desc + 1); - } else { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); + if (desc_count) { writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); } + return; dma_error: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index abcdecabbc56..a586e19cfd1d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -309,7 +309,6 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ u8 packet_stride; -#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 97f96e0d9c4c..ca7afe59c55f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -348,6 +348,7 @@ enum i40e_nvmupd_state { I40E_NVMUPD_STATE_WRITING, I40E_NVMUPD_STATE_INIT_WAIT, I40E_NVMUPD_STATE_WRITE_WAIT, + I40E_NVMUPD_STATE_ERROR }; /* nvm_access definition and its masks/shifts need to be accessible to diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index bd691ad86673..fc374f833aa9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -162,6 +162,10 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 +#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ + I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) + struct i40e_virtchnl_vf_resource { u16 num_vsis; u16 num_queue_pairs; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index c5fd724313c7..fffe4cf2c20b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -107,7 +107,8 @@ struct i40e_q_vector { int v_idx; /* vector index in list */ char name[IFNAMSIZ + 9]; bool arm_wb_state; - cpumask_var_t affinity_mask; + cpumask_t affinity_mask; + struct irq_affinity_notify affinity_notify; }; /* Helper macros to switch between ints/sec and what the register uses. diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 14372810fc27..db36744c6691 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 16 +#define DRV_VERSION_BUILD 21 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -496,6 +496,33 @@ static void i40evf_netpoll(struct net_device *netdev) #endif /** + * i40evf_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + **/ +static void i40evf_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct i40e_q_vector *q_vector = + container_of(notify, struct i40e_q_vector, affinity_notify); + + q_vector->affinity_mask = *mask; +} + +/** + * i40evf_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + **/ +static void i40evf_irq_affinity_release(struct kref *ref) {} + +/** * i40evf_request_traffic_irqs - Initialize MSI-X interrupts * @adapter: board private structure * @@ -507,6 +534,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) { int vector, err, q_vectors; int rx_int_idx = 0, tx_int_idx = 0; + int irq_num; i40evf_irq_disable(adapter); /* Decrement for Other and TCP Timer vectors */ @@ -514,6 +542,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -532,21 +561,23 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) /* skip this unused q_vector */ continue; } - err = request_irq( - adapter->msix_entries[vector + NONQ_VECS].vector, - i40evf_msix_clean_rings, - 0, - q_vector->name, - q_vector); + err = request_irq(irq_num, + i40evf_msix_clean_rings, + 0, + q_vector->name, + q_vector); if (err) { dev_info(&adapter->pdev->dev, "Request_irq failed, error: %d\n", err); goto free_queue_irqs; } + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = i40evf_irq_affinity_notify; + q_vector->affinity_notify.release = + i40evf_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* assign the mask for this irq */ - irq_set_affinity_hint( - adapter->msix_entries[vector + NONQ_VECS].vector, - q_vector->affinity_mask); + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } return 0; @@ -554,11 +585,10 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) free_queue_irqs: while (vector) { vector--; - irq_set_affinity_hint( - adapter->msix_entries[vector + NONQ_VECS].vector, - NULL); - free_irq(adapter->msix_entries[vector + NONQ_VECS].vector, - &adapter->q_vectors[vector]); + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &adapter->q_vectors[vector]); } return err; } @@ -599,16 +629,15 @@ static int i40evf_request_misc_irq(struct i40evf_adapter *adapter) **/ static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter) { - int i; - int q_vectors; + int vector, irq_num, q_vectors; q_vectors = adapter->num_msix_vectors - NONQ_VECS; - for (i = 0; i < q_vectors; i++) { - irq_set_affinity_hint(adapter->msix_entries[i+1].vector, - NULL); - free_irq(adapter->msix_entries[i+1].vector, - &adapter->q_vectors[i]); + for (vector = 0; vector < q_vectors; vector++) { + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &adapter->q_vectors[vector]); } } @@ -1717,15 +1746,17 @@ static void i40evf_reset_task(struct work_struct *work) /* wait until the reset is complete and the PF is responding to us */ for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) { + /* sleep first to make sure a minimum wait time is met */ + msleep(I40EVF_RESET_WAIT_MS); + reg_val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if (reg_val == I40E_VFR_VFACTIVE) break; - msleep(I40EVF_RESET_WAIT_MS); } + pci_set_master(adapter->pdev); - /* extra wait to make sure minimum wait is met */ - msleep(I40EVF_RESET_WAIT_MS); + if (i == I40EVF_RESET_WAIT_COUNT) { struct i40evf_mac_filter *ftmp; struct i40evf_vlan_filter *fv, *fvtmp; @@ -2133,10 +2164,6 @@ static struct net_device_stats *i40evf_get_stats(struct net_device *netdev) static int i40evf_change_mtu(struct net_device *netdev, int new_mtu) { struct i40evf_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - - if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER)) - return -EINVAL; netdev->mtu = new_mtu; adapter->flags |= I40EVF_FLAG_RESET_NEEDED; @@ -2424,6 +2451,10 @@ static void i40evf_init_task(struct work_struct *work) i40evf_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; + /* MTU range: 68 - 9710 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = I40E_MAX_RXBUFFER - (ETH_HLEN + ETH_FCS_LEN); + if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", adapter->hw.mac.addr); diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 2688180a7acd..8aee314332a8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -357,7 +357,8 @@ #define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ -#define MAX_JUMBO_FRAME_SIZE 0x2600 +#define MAX_JUMBO_FRAME_SIZE 0x2600 +#define MAX_STD_JUMBO_FRAME_SIZE 9216 /* PBA constants */ #define E1000_PBA_34K 0x0022 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index edc9a6ac5169..4feca69e5833 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2468,6 +2468,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 68 - 9216 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; + adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); /* before reading the NVM, reset the controller to put the device in a @@ -5408,17 +5412,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) struct pci_dev *pdev = adapter->pdev; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { - dev_err(&pdev->dev, "Invalid MTU setting\n"); - return -EINVAL; - } - -#define MAX_STD_JUMBO_FRAME_SIZE 9238 - if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { - dev_err(&pdev->dev, "MTU > 9216 not supported.\n"); - return -EINVAL; - } - /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index a7895c4cbcc3..c30eea8399a7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -226,7 +226,7 @@ static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfine_82580(struct ptp_clock_info *ptp, long scaled_ppm) { struct igb_adapter *igb = container_of(ptp, struct igb_adapter, ptp_caps); @@ -235,13 +235,13 @@ static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) u64 rate; u32 inca; - if (ppb < 0) { + if (scaled_ppm < 0) { neg_adj = 1; - ppb = -ppb; + scaled_ppm = -scaled_ppm; } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); + rate = scaled_ppm; + rate <<= 13; + rate = div_u64(rate, 15625); inca = rate & INCVALUE_MASK; if (neg_adj) @@ -1103,7 +1103,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.max_adj = 62499999; adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576; adapter->ptp_caps.settime64 = igb_ptp_settime_82576; @@ -1131,7 +1131,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.n_pins = IGB_N_SDP; adapter->ptp_caps.pps = 1; adapter->ptp_caps.pin_config = adapter->sdp_config; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210; adapter->ptp_caps.settime64 = igb_ptp_settime_i210; diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h index ee1ef08d7fc4..f1789d192e24 100644 --- a/drivers/net/ethernet/intel/igbvf/defines.h +++ b/drivers/net/ethernet/intel/igbvf/defines.h @@ -85,7 +85,8 @@ #define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ #define E1000_TXD_STAT_DD 0x00000001 /* Desc Done */ -#define MAX_JUMBO_FRAME_SIZE 0x3F00 +#define MAX_JUMBO_FRAME_SIZE 0x3F00 +#define MAX_STD_JUMBO_FRAME_SIZE 9216 /* 802.1q VLAN Packet Size */ #define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 12bb877df860..810fcf7aa2c6 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2356,16 +2356,6 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu) struct igbvf_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - if (new_mtu < 68 || new_mtu > INT_MAX - ETH_HLEN - ETH_FCS_LEN || - max_frame > MAX_JUMBO_FRAME_SIZE) - return -EINVAL; - -#define MAX_STD_JUMBO_FRAME_SIZE 9234 - if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { - dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n"); - return -EINVAL; - } - while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state)) usleep_range(1000, 2000); /* igbvf_down has a dependency on max_frame_size */ @@ -2786,6 +2776,10 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX; + /* MTU range: 68 - 9216 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; + /*reset the controller to put the device in a known good state */ err = hw->mac.ops.reset_hw(hw); if (err) { diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 31f91459312f..5826b1ddedcf 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -487,6 +487,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->vlan_features |= NETIF_F_HIGHDMA; } + /* MTU range: 68 - 16114 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = IXGB_MAX_JUMBO_FRAME_SIZE - ETH_HLEN; + /* make sure the EEPROM is good */ if (!ixgb_validate_eeprom_checksum(&adapter->hw)) { @@ -1619,18 +1623,6 @@ ixgb_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgb_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH; - int old_max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH; - - /* MTU < 68 is an error for IPv4 traffic, just don't allow it */ - if ((new_mtu < 68) || - (max_frame > IXGB_MAX_JUMBO_FRAME_SIZE + ENET_FCS_LENGTH)) { - netif_err(adapter, probe, adapter->netdev, - "Invalid MTU setting %d\n", new_mtu); - return -EINVAL; - } - - if (old_max_frame == max_frame) - return 0; if (netif_running(netdev)) ixgb_down(adapter, true); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b06e32d0d22a..ef81c3d8c295 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -1027,4 +1027,6 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, struct ixgbe_ring *tx_ring); u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter); void ixgbe_store_reta(struct ixgbe_adapter *adapter); +s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm); #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index fb51be74dd4c..805ab319e578 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -367,7 +367,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) } /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); + hw->mac.ops.fc_autoneg(hw); /* Disable any previous flow control settings */ fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); @@ -1179,6 +1179,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = { .get_link_capabilities = &ixgbe_get_link_capabilities_82598, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_generic, .blink_led_stop = &ixgbe_blink_led_stop_generic, .set_rar = &ixgbe_set_rar_generic, @@ -1193,6 +1194,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = { .set_vfta = &ixgbe_set_vfta_82598, .fc_enable = &ixgbe_fc_enable_82598, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = NULL, .acquire_swfw_sync = &ixgbe_acquire_swfw_sync, .release_swfw_sync = &ixgbe_release_swfw_sync, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 63b25006ac90..e00aaeb91827 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -2204,6 +2204,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = { .get_link_capabilities = &ixgbe_get_link_capabilities_82599, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_generic, .blink_led_stop = &ixgbe_blink_led_stop_generic, .set_rar = &ixgbe_set_rar_generic, @@ -2219,6 +2220,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = { .set_vfta = &ixgbe_set_vfta_generic, .fc_enable = &ixgbe_fc_enable_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic, .init_uta_tables = &ixgbe_init_uta_tables_generic, .setup_sfp = &ixgbe_setup_sfp_modules_82599, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 77d3039283f6..8832df3eba25 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -298,10 +298,12 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); IXGBE_WRITE_FLUSH(hw); - /* Setup flow control */ - ret_val = hw->mac.ops.setup_fc(hw); - if (ret_val) - return ret_val; + /* Setup flow control if method for doing so */ + if (hw->mac.ops.setup_fc) { + ret_val = hw->mac.ops.setup_fc(hw); + if (ret_val) + return ret_val; + } /* Cashe bit indicating need for crosstalk fix */ switch (hw->mac.type) { @@ -390,6 +392,9 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) status = hw->mac.ops.start_hw(hw); } + /* Initialize the LED link active for LED blink support */ + hw->mac.ops.init_led_link_act(hw); + return status; } @@ -773,6 +778,49 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) } /** + * ixgbe_init_led_link_act_generic - Store the LED index link/activity. + * @hw: pointer to hardware structure + * + * Store the index for the link active LED. This will be used to support + * blinking the LED. + **/ +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 led_reg, led_mode; + u16 i; + + led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* Get LED link active from the LEDCTL register */ + for (i = 0; i < 4; i++) { + led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i); + + if ((led_mode & IXGBE_LED_MODE_MASK_BASE) == + IXGBE_LED_LINK_ACTIVE) { + mac->led_link_act = i; + return 0; + } + } + + /* If LEDCTL register does not have the LED link active set, then use + * known MAC defaults. + */ + switch (hw->mac.type) { + case ixgbe_mac_x550em_a: + mac->led_link_act = 0; + break; + case ixgbe_mac_X550EM_x: + mac->led_link_act = 1; + break; + default: + mac->led_link_act = 2; + } + + return 0; +} + +/** * ixgbe_led_on_generic - Turns on the software controllable LEDs. * @hw: pointer to hardware structure * @index: led number to turn on @@ -2127,7 +2175,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) } /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); + hw->mac.ops.fc_autoneg(hw); /* Disable any previous flow control settings */ mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN); @@ -2231,8 +2279,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) * Find the intersection between advertised settings and link partner's * advertised settings **/ -static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, - u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) +s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) return IXGBE_ERR_FC_NOT_NEGOTIATED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 6d4c260d0cbd..5b3e3c65927e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -49,6 +49,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw); s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f49f80380aa5..fd192bf29b26 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2225,11 +2225,11 @@ static int ixgbe_set_phys_id(struct net_device *netdev, return 2; case ETHTOOL_ID_ON: - hw->mac.ops.led_on(hw, hw->bus.func); + hw->mac.ops.led_on(hw, hw->mac.led_link_act); break; case ETHTOOL_ID_OFF: - hw->mac.ops.led_off(hw, hw->bus.func); + hw->mac.ops.led_off(hw, hw->mac.led_link_act); break; case ETHTOOL_ID_INACTIVE: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bd93d823cc25..2436984481cc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -54,6 +54,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> +#include <net/vxlan.h> #include "ixgbe.h" #include "ixgbe_common.h" @@ -3070,6 +3071,9 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) return; } + if (!adapter->msix_entries) + return; + for (vector = 0; vector < adapter->num_q_vectors; vector++) { struct ixgbe_q_vector *q_vector = adapter->q_vector[vector]; struct msix_entry *entry = &adapter->msix_entries[vector]; @@ -5012,24 +5016,23 @@ fwd_queue_err: return err; } -static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) +static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) { - struct net_device *upper; - struct list_head *iter; - int err; + if (netif_is_macvlan(upper)) { + struct macvlan_dev *dfwd = netdev_priv(upper); + struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; - - if (dfwd->fwd_priv) { - err = ixgbe_fwd_ring_up(upper, vadapter); - if (err) - continue; - } - } + if (dfwd->fwd_priv) + ixgbe_fwd_ring_up(upper, vadapter); } + + return 0; +} + +static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) +{ + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_upper_dev_walk, NULL); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -5448,12 +5451,25 @@ static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) spin_unlock(&adapter->fdir_perfect_lock); } +static int ixgbe_disable_macvlan(struct net_device *upper, void *data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) { + netif_tx_stop_all_queues(upper); + netif_carrier_off(upper); + netif_tx_disable(upper); + } + } + + return 0; +} + void ixgbe_down(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *upper; - struct list_head *iter; int i; /* signal that we are down to the interrupt handler */ @@ -5477,17 +5493,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) netif_tx_disable(netdev); /* disable any upper devices */ - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *vlan = netdev_priv(upper); - - if (vlan->fwd_priv) { - netif_tx_stop_all_queues(upper); - netif_carrier_off(upper); - netif_tx_disable(upper); - } - } - } + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_disable_macvlan, NULL); ixgbe_irq_disable(adapter); @@ -5618,7 +5625,8 @@ static void ixgbe_init_dcb(struct ixgbe_adapter *adapter) * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ -static int ixgbe_sw_init(struct ixgbe_adapter *adapter) +static int ixgbe_sw_init(struct ixgbe_adapter *adapter, + const struct ixgbe_info *ii) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; @@ -5634,6 +5642,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; + /* get_invariants needs the device IDs */ + ii->get_invariants(hw); + /* Set common capability flags and settings */ rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus()); adapter->ring_feature[RING_F_RSS].limit = rss; @@ -6049,11 +6060,6 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - - /* MTU < 68 is an error and causes problems on some kernels */ - if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) - return -EINVAL; /* * For 82599EB we cannot allow legacy VFs to enable their receive @@ -6062,7 +6068,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) */ if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (adapter->hw.mac.type == ixgbe_mac_82599EB) && - (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))) + (new_mtu > ETH_DATA_LEN)) e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n"); e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -6728,6 +6734,18 @@ static void ixgbe_update_default_up(struct ixgbe_adapter *adapter) #endif } +static int ixgbe_enable_macvlan(struct net_device *upper, void *data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) + netif_tx_wake_all_queues(upper); + } + + return 0; +} + /** * ixgbe_watchdog_link_is_up - update netif_carrier status and * print link up message @@ -6737,8 +6755,6 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *upper; - struct list_head *iter; u32 link_speed = adapter->link_speed; const char *speed_str; bool flow_rx, flow_tx; @@ -6809,14 +6825,8 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) /* enable any upper devices */ rtnl_lock(); - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *vlan = netdev_priv(upper); - - if (vlan->fwd_priv) - netif_tx_wake_all_queues(upper); - } - } + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_enable_macvlan, NULL); rtnl_unlock(); /* update the default user priority for VFs */ @@ -7651,11 +7661,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring, /* snag network header to get L4 type and address */ skb = first->skb; hdr.network = skb_network_header(skb); + if (unlikely(hdr.network <= skb->data)) + return; if (skb->encapsulation && first->protocol == htons(ETH_P_IP) && - hdr.ipv4->protocol != IPPROTO_UDP) { + hdr.ipv4->protocol == IPPROTO_UDP) { struct ixgbe_adapter *adapter = q_vector->adapter; + if (unlikely(skb_tail_pointer(skb) < hdr.network + + VXLAN_HEADROOM)) + return; + /* verify the port is recognized as VXLAN */ if (adapter->vxlan_port && udp_hdr(skb)->dest == adapter->vxlan_port) @@ -7666,6 +7682,12 @@ static void ixgbe_atr(struct ixgbe_ring *ring, hdr.network = skb_inner_network_header(skb); } + /* Make sure we have at least [minimum IPv4 header + TCP] + * or [IPv6 header] bytes + */ + if (unlikely(skb_tail_pointer(skb) < hdr.network + 40)) + return; + /* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { case IPVERSION: @@ -7685,6 +7707,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, if (l4_proto != IPPROTO_TCP) return; + if (unlikely(skb_tail_pointer(skb) < hdr.network + + hlen + sizeof(struct tcphdr))) + return; + th = (struct tcphdr *)(hdr.network + hlen); /* skip this packet since the socket is closing */ @@ -8350,12 +8376,38 @@ static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, } #ifdef CONFIG_NET_CLS_ACT +struct upper_walk_data { + struct ixgbe_adapter *adapter; + u64 action; + int ifindex; + u8 queue; +}; + +static int get_macvlan_queue(struct net_device *upper, void *_data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *dfwd = netdev_priv(upper); + struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct upper_walk_data *data = _data; + struct ixgbe_adapter *adapter = data->adapter; + int ifindex = data->ifindex; + + if (vadapter && vadapter->netdev->ifindex == ifindex) { + data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; + data->action = data->queue; + return 1; + } + } + + return 0; +} + static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, u8 *queue, u64 *action) { unsigned int num_vfs = adapter->num_vfs, vf; + struct upper_walk_data data; struct net_device *upper; - struct list_head *iter; /* redirect to a SRIOV VF */ for (vf = 0; vf < num_vfs; ++vf) { @@ -8373,17 +8425,16 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, } /* redirect to a offloaded macvlan netdev */ - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; - - if (vadapter && vadapter->netdev->ifindex == ifindex) { - *queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; - *action = *queue; - return 0; - } - } + data.adapter = adapter; + data.ifindex = ifindex; + data.action = 0; + data.queue = 0; + if (netdev_walk_all_upper_dev_rcu(adapter->netdev, + get_macvlan_queue, &data)) { + *action = data.action; + *queue = data.queue; + + return 0; } return -EINVAL; @@ -8410,7 +8461,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, } /* Redirect to a VF or a offloaded macvlan */ - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); err = handle_redirect_action(adapter, ifindex, queue, @@ -9477,6 +9528,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->mac.ops = *ii->mac_ops; hw->mac.type = ii->mac; hw->mvals = ii->mvals; + if (ii->link_ops) + hw->link.ops = *ii->link_ops; /* EEPROM */ hw->eeprom.ops = *ii->eeprom_ops; @@ -9500,10 +9553,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->phy.mdio.mdio_read = ixgbe_mdio_read; hw->phy.mdio.mdio_write = ixgbe_mdio_write; - ii->get_invariants(hw); - /* setup the private structure */ - err = ixgbe_sw_init(adapter); + err = ixgbe_sw_init(adapter, ii); if (err) goto err_sw_init; @@ -9612,6 +9663,10 @@ skip_sriov: netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; + /* MTU range: 68 - 9710 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); + #ifdef CONFIG_IXGBE_DCB if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) netdev->dcbnl_ops = &dcbnl_ops; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 021ab9b89c71..3b8362085f57 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -109,8 +109,8 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2) * * Returns an error code on error. */ -static s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val, bool lock) +s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; int max_retry = 10; @@ -178,36 +178,6 @@ fail: } /** - * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation - * @hw: pointer to the hardware structure - * @addr: I2C bus address to read from - * @reg: I2C device register to read from - * @val: pointer to location to receive read value - * - * Returns an error code on error. - */ -s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val) -{ - return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true); -} - -/** - * ixgbe_read_i2c_combined_generic_unlocked - Unlocked I2C read combined - * @hw: pointer to the hardware structure - * @addr: I2C bus address to read from - * @reg: I2C device register to read from - * @val: pointer to location to receive read value - * - * Returns an error code on error. - */ -s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val) -{ - return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false); -} - -/** * ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation * @hw: pointer to the hardware structure * @addr: I2C bus address to write to @@ -217,8 +187,8 @@ s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, * * Returns an error code on error. */ -static s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val, bool lock) +s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; int max_retry = 1; @@ -273,33 +243,39 @@ fail: } /** - * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation - * @hw: pointer to the hardware structure - * @addr: I2C bus address to write to - * @reg: I2C device register to write to - * @val: value to write + * ixgbe_probe_phy - Probe a single address for a PHY + * @hw: pointer to hardware structure + * @phy_addr: PHY address to probe * - * Returns an error code on error. - */ -s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, - u8 addr, u16 reg, u16 val) + * Returns true if PHY found + **/ +static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) { - return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true); -} + u16 ext_ability = 0; -/** - * ixgbe_write_i2c_combined_generic_unlocked - Unlocked I2C write combined - * @hw: pointer to the hardware structure - * @addr: I2C bus address to write to - * @reg: I2C device register to write to - * @val: value to write - * - * Returns an error code on error. - */ -s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, - u8 addr, u16 reg, u16 val) -{ - return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false); + hw->phy.mdio.prtad = phy_addr; + if (mdio45_probe(&hw->phy.mdio, phy_addr) != 0) + return false; + + if (ixgbe_get_phy_id(hw)) + return false; + + hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, + MDIO_PMA_EXTABLE, + MDIO_MMD_PMAPMD, + &ext_ability); + if (ext_ability & + (MDIO_PMA_EXTABLE_10GBT | + MDIO_PMA_EXTABLE_1000BT)) + hw->phy.type = ixgbe_phy_cu_unknown; + else + hw->phy.type = ixgbe_phy_generic; + } + + return true; } /** @@ -311,7 +287,7 @@ s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { u32 phy_addr; - u16 ext_ability = 0; + u32 status = IXGBE_ERR_PHY_ADDR_INVALID; if (!hw->phy.phy_semaphore_mask) { if (hw->bus.lan_id) @@ -320,37 +296,34 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; } - if (hw->phy.type == ixgbe_phy_unknown) { - for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { - hw->phy.mdio.prtad = phy_addr; - if (mdio45_probe(&hw->phy.mdio, phy_addr) == 0) { - ixgbe_get_phy_id(hw); - hw->phy.type = - ixgbe_get_phy_type_from_id(hw->phy.id); - - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.ops.read_reg(hw, - MDIO_PMA_EXTABLE, - MDIO_MMD_PMAPMD, - &ext_ability); - if (ext_ability & - (MDIO_PMA_EXTABLE_10GBT | - MDIO_PMA_EXTABLE_1000BT)) - hw->phy.type = - ixgbe_phy_cu_unknown; - else - hw->phy.type = - ixgbe_phy_generic; - } + if (hw->phy.type != ixgbe_phy_unknown) + return 0; - return 0; - } + if (hw->phy.nw_mng_if_sel) { + phy_addr = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + if (ixgbe_probe_phy(hw, phy_addr)) + return 0; + else + return IXGBE_ERR_PHY_ADDR_INVALID; + } + + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_probe_phy(hw, phy_addr)) { + status = 0; + break; } - /* indicate no PHY found */ - hw->phy.mdio.prtad = MDIO_PRTAD_NONE; - return IXGBE_ERR_PHY_ADDR_INVALID; } - return 0; + + /* Certain media types do not have a phy so an address will not + * be found and the code will take this path. Caller has to + * decide if it is an error or not. + */ + if (status) + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; + + return status; } /** @@ -416,7 +389,8 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case TN1010_PHY_ID: phy_type = ixgbe_phy_tn; break; - case X550_PHY_ID: + case X550_PHY_ID2: + case X550_PHY_ID3: case X540_PHY_ID: phy_type = ixgbe_phy_aq; break; @@ -427,6 +401,7 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) phy_type = ixgbe_phy_nl; break; case X557_PHY_ID: + case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; default: @@ -477,8 +452,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) */ for (i = 0; i < 30; i++) { msleep(100); - hw->phy.ops.read_reg(hw, MDIO_CTRL1, - MDIO_MMD_PHYXS, &ctrl); + hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS, &ctrl); if (!(ctrl & MDIO_CTRL1_RESET)) { udelay(2); break; @@ -705,53 +679,52 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, - MDIO_MMD_AN, - &autoneg_reg); + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, &autoneg_reg); - autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G; - - hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, - MDIO_MMD_AN, - autoneg_reg); - } + autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) && + (speed & IXGBE_LINK_SPEED_10GB_FULL)) + autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G; - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - MDIO_MMD_AN, - &autoneg_reg); + hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, autoneg_reg); - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + MDIO_MMD_AN, &autoneg_reg); - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - MDIO_MMD_AN, - autoneg_reg); + if (hw->mac.type == ixgbe_mac_X550) { + /* Set or unset auto-negotiation 5G advertisement */ + autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_5GB_FULL)) + autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; + + /* Set or unset auto-negotiation 2.5G advertisement */ + autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_2_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_2_5GB_FULL)) + autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; } - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, - MDIO_MMD_AN, - &autoneg_reg); + /* Set or unset auto-negotiation 1G advertisement */ + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) && + (speed & IXGBE_LINK_SPEED_1GB_FULL)) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; - autoneg_reg &= ~(ADVERTISE_100FULL | - ADVERTISE_100HALF); - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= ADVERTISE_100FULL; + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + MDIO_MMD_AN, autoneg_reg); - hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, - MDIO_MMD_AN, - autoneg_reg); - } + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, &autoneg_reg); + + autoneg_reg &= ~(ADVERTISE_100FULL | ADVERTISE_100HALF); + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) && + (speed & IXGBE_LINK_SPEED_100_FULL)) + autoneg_reg |= ADVERTISE_100FULL; + + hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, autoneg_reg); /* Blocked by MNG FW so don't reset PHY */ if (ixgbe_check_reset_blocked(hw)) @@ -830,6 +803,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; break; case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL; break; default: @@ -2396,9 +2370,7 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (!on && ixgbe_mng_present(hw)) return 0; - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - ®); + status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, ®); if (status) return status; @@ -2410,8 +2382,6 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) reg |= IXGBE_MDIO_PHY_SET_LOW_POWER_MODE; } - status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - reg); + status = hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, reg); return status; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index cc735ec3e045..ecf05f838fc5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -195,12 +195,8 @@ s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 *sff8472_data); s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data); -s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val); -s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val); -s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val); -s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val); +s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg, + u16 *val, bool lock); +s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg, + u16 val, bool lock); #endif /* _IXGBE_PHY_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 31d82e3abac8..cf21273db201 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -874,19 +874,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB 0x4 /* 1Gb/s */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB 0x6 /* 10Gb/s */ -#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */ #define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */ #define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */ -#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */ -#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/ #define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/ #define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/ #define IXGBE_MII_2_5GBASE_T_ADVERTISE 0x0400 #define IXGBE_MII_5GBASE_T_ADVERTISE 0x0800 -#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */ -#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */ #define IXGBE_MII_RESTART 0x200 -#define IXGBE_MII_AUTONEG_COMPLETE 0x20 #define IXGBE_MII_AUTONEG_LINK_UP 0x04 #define IXGBE_MII_AUTONEG_REG 0x0 @@ -1320,30 +1314,20 @@ struct ixgbe_thermal_sensor_data { /* MDIO definitions */ #define IXGBE_MDIO_ZERO_DEV_TYPE 0x0 -#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 #define IXGBE_MDIO_PCS_DEV_TYPE 0x3 -#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4 -#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */ #define IXGBE_TWINAX_DEV 1 #define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Control Reg */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0 - 10G, 1 - 1G */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010 -#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */ -#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT 0xC800 /* AUTO_NEG Vendor Status Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM 0xCC00 /* AUTO_NEG Vendor TX Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */ #define IXGBE_MDIO_AUTO_NEG_VEN_LSC 0x1 /* AUTO_NEG Vendor Tx LSC */ -#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */ -#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */ #define IXGBE_MDIO_AUTO_NEG_EEE_ADVT 0x3C /* AUTO_NEG EEE Advt Reg */ #define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ @@ -1393,8 +1377,10 @@ struct ixgbe_thermal_sensor_data { #define TN1010_PHY_ID 0x00A19410 #define TNX_FW_REV 0xB #define X540_PHY_ID 0x01540200 -#define X550_PHY_ID 0x01540220 +#define X550_PHY_ID2 0x01540223 +#define X550_PHY_ID3 0x01540221 #define X557_PHY_ID 0x01540240 +#define X557_PHY_ID2 0x01540250 #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 #define AQ_FW_REV 0x20 @@ -3352,6 +3338,7 @@ struct ixgbe_mac_operations { s32 (*led_off)(struct ixgbe_hw *, u32); s32 (*blink_led_start)(struct ixgbe_hw *, u32); s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + s32 (*init_led_link_act)(struct ixgbe_hw *); /* RAR, Multicast, VLAN */ s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); @@ -3372,6 +3359,7 @@ struct ixgbe_mac_operations { /* Flow Control */ s32 (*fc_enable)(struct ixgbe_hw *); s32 (*setup_fc)(struct ixgbe_hw *); + void (*fc_autoneg)(struct ixgbe_hw *); /* Manageability interface */ s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); @@ -3410,16 +3398,28 @@ struct ixgbe_phy_operations { s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *); s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - s32 (*read_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val); - s32 (*write_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val); s32 (*check_overtemp)(struct ixgbe_hw *); s32 (*set_phy_power)(struct ixgbe_hw *, bool on); s32 (*enter_lplu)(struct ixgbe_hw *); s32 (*handle_lasi)(struct ixgbe_hw *hw); - s32 (*read_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, - u16 *value); - s32 (*write_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, - u16 value); + s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, + u8 *value); + s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, + u8 value); +}; + +struct ixgbe_link_operations { + s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val); + s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, + u16 *val); + s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val); + s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, + u16 val); +}; + +struct ixgbe_link_info { + struct ixgbe_link_operations ops; + u8 addr; }; struct ixgbe_eeprom_info { @@ -3462,6 +3462,7 @@ struct ixgbe_mac_info { u8 san_mac_rar_index; struct ixgbe_thermal_sensor_data thermal_sensor_data; bool set_lben; + u8 led_link_act; }; struct ixgbe_phy_info { @@ -3523,6 +3524,7 @@ struct ixgbe_hw { struct ixgbe_addr_filter_info addr_ctrl; struct ixgbe_fc_info fc; struct ixgbe_phy_info phy; + struct ixgbe_link_info link; struct ixgbe_eeprom_info eeprom; struct ixgbe_bus_info bus; struct ixgbe_mbx_info mbx; @@ -3546,6 +3548,7 @@ struct ixgbe_info { const struct ixgbe_eeprom_operations *eeprom_ops; const struct ixgbe_phy_operations *phy_ops; const struct ixgbe_mbx_operations *mbx_ops; + const struct ixgbe_link_operations *link_ops; const u32 *mvals; }; @@ -3593,17 +3596,35 @@ struct ixgbe_info { #define IXGBE_FUSES0_REV_MASK (3u << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) +#define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) #define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C) #define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248) #define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C) #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634) #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638) #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P) ((P) ? 0x8B00 : 0x4B00) #define IXGBE_KRM_PMD_DFX_BURNIN(P) ((P) ? 0x8E00 : 0x4E00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054) #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR BIT(20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR (0x2 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN BIT(25) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN BIT(26) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN BIT(27) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M ~(0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M BIT(28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G (0x2 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G (0x3 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN (0x4 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART BIT(31) + #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B BIT(9) #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS BIT(11) @@ -3618,6 +3639,7 @@ struct ixgbe_info { #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR BIT(18) #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX BIT(24) #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR BIT(26) +#define IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE BIT(28) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE BIT(29) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART BIT(31) @@ -3627,6 +3649,8 @@ struct ixgbe_info { #define IXGBE_KRM_AN_CNTL_8_LINEAR BIT(0) #define IXGBE_KRM_AN_CNTL_8_LIMITING BIT(1) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE BIT(10) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE BIT(11) #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D BIT(12) #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D BIT(19) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index f2b1d48a16c3..e2ff823ee202 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -851,6 +851,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = { .get_link_capabilities = &ixgbe_get_copper_link_capabilities_generic, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_X540, .blink_led_stop = &ixgbe_blink_led_stop_X540, .set_rar = &ixgbe_set_rar_generic, @@ -866,6 +867,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = { .set_vfta = &ixgbe_set_vfta_generic, .fc_enable = &ixgbe_fc_enable_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic, .init_uta_tables = &ixgbe_init_uta_tables_generic, .setup_sfp = NULL, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 7e6b9267ca9d..11fb433eb924 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -28,11 +28,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed); static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *); +static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *); +static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *); +static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *); static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_link_info *link = &hw->link; + + /* Start with X540 invariants, since so simular */ + ixgbe_get_invariants_X540(hw); + + if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper) + phy->ops.set_phy_power = NULL; + + link->addr = IXGBE_CS4227; + + return 0; +} + +static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; /* Start with X540 invariants, since so simular */ ixgbe_get_invariants_X540(hw); @@ -69,8 +89,7 @@ static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw) */ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value) { - return hw->phy.ops.read_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, - value); + return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value); } /** @@ -83,8 +102,7 @@ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value) */ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value) { - return hw->phy.ops.write_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, - value); + return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value); } /** @@ -322,6 +340,68 @@ static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_NOT_IMPLEMENTED; } +/** + * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to read from + * @reg: I2C device register to read from + * @val: pointer to location to receive read value + * + * Returns an error code on error. + **/ +static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val) +{ + return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true); +} + +/** + * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to read from + * @reg: I2C device register to read from + * @val: pointer to location to receive read value + * + * Returns an error code on error. + **/ +static s32 +ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val) +{ + return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false); +} + +/** + * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to write to + * @reg: I2C device register to write to + * @val: value to write + * + * Returns an error code on error. + **/ +static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, + u8 addr, u16 reg, u16 val) +{ + return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true); +} + +/** + * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to write to + * @reg: I2C device register to write to + * @val: value to write + * + * Returns an error code on error. + **/ +static s32 +ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, + u8 addr, u16 reg, u16 val) +{ + return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false); +} + /** ixgbe_init_eeprom_params_X550 - Initialize EEPROM params * @hw: pointer to hardware structure * @@ -1128,47 +1208,17 @@ out: return ret; } -/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. +/** + * ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration * @hw: pointer to hardware structure - * @speed: the link speed to force * - * Configures the integrated KR PHY to use iXFI mode. Used to connect an - * internal and external PHY at a specific speed, without autonegotiation. + * iXfI configuration needed for ixgbe_mac_X550EM_x devices. **/ -static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) { s32 status; u32 reg_val; - /* Disable AN and force speed to 10G Serial. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - if (status) - return status; - - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; - - /* Select forced link speed for internal PHY. */ - switch (*speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; - break; - default: - /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; - } - - status = ixgbe_write_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - if (status) - return status; - /* Disable training protocol FSM. */ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), @@ -1228,20 +1278,106 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - if (status) + return status; +} + +/** + * ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the + * internal PHY + * @hw: pointer to hardware structure + **/ +static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw) +{ + s32 status; + u32 link_ctrl; + + /* Restart auto-negotiation. */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); + + if (status) { + hw_dbg(hw, "Auto-negotiation did not complete\n"); return status; + } - /* Toggle port SW reset by AN reset. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + + if (hw->mac.type == ixgbe_mac_x550em_a) { + u32 flx_mask_st20; + + /* Indicate to FW that AN restart has been asserted */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20); + + if (status) { + hw_dbg(hw, "Auto-negotiation did not complete\n"); + return status; + } + + flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20); + } + + return status; +} + +/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated KR PHY to use iXFI mode. Used to connect an + * internal and external PHY at a specific speed, without autonegotiation. + **/ +static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + s32 status; + u32 reg_val; + + /* Disable AN and force speed to 10G Serial. */ + status = ixgbe_read_iosf_sb_reg_x550(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status) return status; - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal KR PHY. */ + return IXGBE_ERR_LINK_SETUP; + } + status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + if (status) + return status; + + /* Additional configuration needed for x550em_x */ + if (hw->mac.type == ixgbe_mac_X550EM_x) { + status = ixgbe_setup_ixfi_x550em_x(hw); + if (status) + return status; + } + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -1292,7 +1428,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, __always_unused bool autoneg_wait_to_complete) { s32 status; - u16 slice, value; + u16 reg_slice, reg_val; bool setup_linear = false; /* Check if SFP module is supported and linear */ @@ -1308,71 +1444,68 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, if (status) return status; - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - /* Configure CS4227 LINE side to 10G SR. */ - slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); - value = IXGBE_CS4227_SPEED_10G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + /* Configure internal PHY for KR/KX. */ + ixgbe_setup_kr_speed_x550em(hw, speed); - slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; - - /* Configure CS4227 for HOST connection rate then type. */ - slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); - value = speed & IXGBE_LINK_SPEED_10GB_FULL ? - IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + /* Configure CS4227 LINE side to proper mode. */ + reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1; + else + reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1; - slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + status = hw->link.ops.write_link(hw, hw->link.addr, reg_slice, + reg_val); - /* Setup XFI internal link. */ - status = ixgbe_setup_ixfi_x550em(hw, &speed); - if (status) { - hw_dbg(hw, "setup_ixfi failed with %d\n", status); - return status; - } - } else { - /* Configure internal PHY for KR/KX. */ - status = ixgbe_setup_kr_speed_x550em(hw, speed); - if (status) { - hw_dbg(hw, "setup_kr_speed failed with %d\n", status); - return status; - } + return status; +} - /* Configure CS4227 LINE side to proper mode. */ - slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; +/** + * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated PHY for native SFI mode. Used to connect the + * internal PHY directly to an SFP cage, without autonegotiation. + **/ +static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 status; + u32 reg_val; + + /* Disable all AN and force speed to 10G Serial. */ + status = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + if (status) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal PHY. */ + return IXGBE_ERR_LINK_SETUP; } - return 0; + status = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); -i2c_err: - hw_dbg(hw, "combined i2c access failed with %d\n", status); return status; } @@ -1388,45 +1521,39 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed, { bool setup_linear = false; u32 reg_phy_int; - s32 rc; + s32 ret_val; /* Check if SFP module is supported and linear */ - rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); + ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (rc == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) return 0; - if (!rc) - return rc; + if (!ret_val) + return ret_val; - /* Configure internal PHY for native SFI */ - rc = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - ®_phy_int); - if (rc) - return rc; + /* Configure internal PHY for native SFI based on module type */ + ret_val = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); + if (!ret_val) + return ret_val; - if (setup_linear) { - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR; - } else { - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR; - } + reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA; + if (!setup_linear) + reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR; - rc = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - reg_phy_int); - if (rc) - return rc; + ret_val = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); + if (!ret_val) + return ret_val; - /* Setup XFI/SFI internal link */ - return ixgbe_setup_ixfi_x550em(hw, &speed); + /* Setup SFI internal link. */ + return ixgbe_setup_sfi_x550a(hw, &speed); } /** @@ -1442,19 +1569,19 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, u32 reg_slice, slice_offset; bool setup_linear = false; u16 reg_phy_ext; - s32 rc; + s32 ret_val; /* Check if SFP module is supported and linear */ - rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); + ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (rc == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) return 0; - if (!rc) - return rc; + if (!ret_val) + return ret_val; /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); @@ -1463,10 +1590,10 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ - rc = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, + ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); - if (rc) - return rc; + if (ret_val) + return ret_val; /* When configuring quad port CS4223, the MAC instance is part * of the slice offset. @@ -1538,7 +1665,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, bool link_up_wait_to_complete) { u32 status; - u16 autoneg_status; + u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; @@ -1550,14 +1677,18 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, if (status || !(*link_up)) return status; - /* MAC link is up, so check external PHY link. - * Read this twice back to back to indicate current status. - */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); - if (status) - return status; + /* MAC link is up, so check external PHY link. + * Link status is latching low, and can only be used to detect link + * drop, and not the current status of the link without performing + * back-to-back reads. + */ + for (i = 0; i < 2; i++) { + status = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, + &autoneg_status); + + if (status) + return status; + } /* If external PHY link is not up, then indicate link not up */ if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS)) @@ -1575,7 +1706,7 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed, __always_unused bool autoneg_wait_to_complete) { struct ixgbe_mac_info *mac = &hw->mac; - u32 lval, sval; + u32 lval, sval, flx_val; s32 rc; rc = mac->ops.read_iosf_sb_reg(hw, @@ -1609,14 +1740,55 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed, if (rc) return rc; - lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + rc = mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + rc = ixgbe_restart_an_internal_phy_x550em(hw); return rc; } +/** ixgbe_init_mac_link_ops_X550em_a - Init mac link function pointers + * @hw: pointer to hardware structure + **/ +static void ixgbe_init_mac_link_ops_X550em_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + + switch (mac->ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + mac->ops.setup_fc = NULL; + mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a; + break; + case ixgbe_media_type_backplane: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a; + mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a; + break; + default: + break; + } +} + /** ixgbe_init_mac_link_ops_X550em - init mac link function pointers * @hw: pointer to hardware structure **/ @@ -1664,6 +1836,10 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw) default: break; } + + /* Additional modification for X550em_a devices */ + if (hw->mac.type == ixgbe_mac_x550em_a) + ixgbe_init_mac_link_ops_X550em_a(hw); } /** ixgbe_setup_sfp_modules_X550em - Setup SFP module @@ -1740,7 +1916,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status || !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN)) @@ -1748,7 +1924,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status || !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN | @@ -1757,7 +1933,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) @@ -1772,7 +1948,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { /* device fault alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1787,14 +1963,14 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status || !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT)) return status; /* link connect/disconnect event occurred */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status) return status; @@ -1826,20 +2002,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) /* Enable link status change alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status) return status; reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); + MDIO_MMD_AN, reg); if (status) return status; /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1848,14 +2024,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; /* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1864,14 +2040,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) IXGBE_MDIO_GLOBAL_ALARM_1_INT); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; /* Enable chip-wide vendor alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1879,7 +2055,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN; status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); return status; @@ -1945,13 +2121,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_1GB_FULL) reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX; - /* Restart auto-negotiation. */ - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; status = hw->mac.ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - return status; + if (hw->mac.type == ixgbe_mac_x550em_a) { + /* Set lane mode to KR auto negotiation */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + + if (status) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + } + + return ixgbe_restart_an_internal_phy_x550em(hw); } /** ixgbe_setup_kx4_x550em - Configure the KX4 PHY. @@ -2020,14 +2214,12 @@ static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up) *link_up = false; /* read this twice back to back to indicate current status */ - ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, &autoneg_status); if (ret) return ret; - ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, &autoneg_status); if (ret) return ret; @@ -2073,7 +2265,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) return 0; status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &speed); if (status) return status; @@ -2134,10 +2326,10 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + MDIO_MMD_VEND1, &phy_data); phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK; hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + MDIO_MMD_VEND1, phy_data); return 0; } @@ -2156,10 +2348,10 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + MDIO_MMD_VEND1, &phy_data); phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK; hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + MDIO_MMD_VEND1, phy_data); return 0; } @@ -2180,7 +2372,7 @@ static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw, *lcd_speed = IXGBE_LINK_SPEED_UNKNOWN; status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &an_lp_status); if (status) return status; @@ -2281,6 +2473,90 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) return rc; } +/** + * ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + **/ +static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) +{ + u32 link_s1, lp_an_page_low, an_cntl_1; + s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; + ixgbe_link_speed speed; + bool link_up; + + /* AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) { + hw_err(hw, "Flow control autoneg is disabled"); + goto out; + } + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) { + hw_err(hw, "The link is down"); + goto out; + } + + /* Check at auto-negotiation has completed */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_S1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); + + if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + + /* Read the 10g AN autoc and LP ability registers and resolve + * local flow control settings accordingly + */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + goto out; + } + + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + goto out; + } + + status = ixgbe_negotiate_fc(hw, an_cntl_1, lp_an_page_low, + IXGBE_KRM_AN_CNTL_1_SYM_PAUSE, + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE, + IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE, + IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE); + +out: + if (!status) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings + * @hw: pointer to hardware structure + **/ +static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw) +{ + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; +} + /** ixgbe_enter_lplu_x550em - Transition to low power states * @hw: pointer to hardware structure * @@ -2327,7 +2603,7 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) return ixgbe_set_copper_phy_power(hw, false); status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &speed); if (status) return status; @@ -2349,20 +2625,20 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) /* Clear AN completed indication */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &autoneg_reg); if (status) return status; - status = hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + status = hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, + MDIO_MMD_AN, &an_10g_cntl_reg); if (status) return status; status = hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &autoneg_reg); if (status) return status; @@ -2520,7 +2796,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_TX_VENDOR_ALARMS_3, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, + MDIO_MMD_PMAPMD, ®); if (status) return status; @@ -2531,7 +2807,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) { status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_RES_PR_10, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -2540,7 +2816,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_RES_PR_10, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; @@ -2729,6 +3005,90 @@ static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, } /** + * ixgbe_setup_fc_backplane_x550em_a - Set up flow control + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + **/ +static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 an_cntl = 0; + + /* Validate the requested mode */ + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + return IXGBE_ERR_INVALID_LINK_SETTINGS; + } + + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + /* Set up the 1G and 10G flow control advertisement registers so the + * HW will be able to do FC autoneg once the cable is plugged in. If + * we link at 10G, the 1G advertisement is harmless and vice versa. + */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + return status; + } + + /* The possible values of fc.requested_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: + /* Flow control completely disabled by software override. */ + an_cntl &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); + break; + case ixgbe_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + an_cntl |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + an_cntl &= ~IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; + break; + case ixgbe_fc_rx_pause: + /* Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE, as such we fall + * through to the fc_full statement. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + break; + default: + hw_err(hw, "Flow control param set incorrectly\n"); + return IXGBE_ERR_CONFIG; + } + + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); + + /* Restart auto-negotiation. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); + + return status; +} + +/** * ixgbe_set_mux - Set mux for port 1 access with CS4227 * @hw: pointer to hardware structure * @state: set mux if 1, clear if 0 @@ -2934,6 +3294,7 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { X550_COMMON_MAC .led_on = ixgbe_led_on_generic, .led_off = ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = &ixgbe_reset_hw_X540, .get_media_type = &ixgbe_get_media_type_X540, .get_san_mac_addr = &ixgbe_get_san_mac_addr_generic, @@ -2948,12 +3309,14 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { .prot_autoc_read = prot_autoc_read_generic, .prot_autoc_write = prot_autoc_write_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, }; static const struct ixgbe_mac_operations mac_ops_X550EM_x = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = &ixgbe_reset_hw_X550em, .get_media_type = &ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2966,6 +3329,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = { .release_swfw_sync = &ixgbe_release_swfw_sync_X550em, .init_swfw_sync = &ixgbe_init_swfw_sync_X540, .setup_fc = NULL, /* defined later */ + .fc_autoneg = ixgbe_fc_autoneg, .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550, .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550, }; @@ -2974,6 +3338,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = ixgbe_reset_hw_X550em, .get_media_type = ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2985,6 +3350,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { .acquire_swfw_sync = ixgbe_acquire_swfw_sync_x550em_a, .release_swfw_sync = ixgbe_release_swfw_sync_x550em_a, .setup_fc = ixgbe_setup_fc_x550em, + .fc_autoneg = ixgbe_fc_autoneg, .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a, .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a, }; @@ -3036,11 +3402,6 @@ static const struct ixgbe_phy_operations phy_ops_X550EM_x = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_generic, .write_reg = &ixgbe_write_phy_reg_generic, - .read_i2c_combined = &ixgbe_read_i2c_combined_generic, - .write_i2c_combined = &ixgbe_write_i2c_combined_generic, - .read_i2c_combined_unlocked = &ixgbe_read_i2c_combined_generic_unlocked, - .write_i2c_combined_unlocked = - &ixgbe_write_i2c_combined_generic_unlocked, }; static const struct ixgbe_phy_operations phy_ops_x550em_a = { @@ -3053,6 +3414,13 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; +static const struct ixgbe_link_operations link_ops_x550em_x = { + .read_link = &ixgbe_read_i2c_combined_generic, + .read_link_unlocked = &ixgbe_read_i2c_combined_generic_unlocked, + .write_link = &ixgbe_write_i2c_combined_generic, + .write_link_unlocked = &ixgbe_write_i2c_combined_generic_unlocked, +}; + static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { IXGBE_MVALS_INIT(X550) }; @@ -3083,11 +3451,12 @@ const struct ixgbe_info ixgbe_X550EM_x_info = { .phy_ops = &phy_ops_X550EM_x, .mbx_ops = &mbx_ops_generic, .mvals = ixgbe_mvals_X550EM_x, + .link_ops = &link_ops_x550em_x, }; const struct ixgbe_info ixgbe_x550em_a_info = { .mac = ixgbe_mac_x550em_a, - .get_invariants = &ixgbe_get_invariants_X550_x, + .get_invariants = &ixgbe_get_invariants_X550_a, .mac_ops = &mac_ops_x550em_a, .eeprom_ops = &eeprom_ops_X550EM_x, .phy_ops = &phy_ops_x550em_a, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 7eaac3234049..d316f503a727 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1498,6 +1498,9 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter) { int i, q_vectors; + if (!adapter->msix_entries) + return; + q_vectors = adapter->num_msix_vectors; i = q_vectors - 1; @@ -2552,6 +2555,9 @@ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) **/ static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter) { + if (!adapter->msix_entries) + return; + pci_disable_msix(adapter->pdev); kfree(adapter->msix_entries); adapter->msix_entries = NULL; @@ -3742,24 +3748,8 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE; int ret; - switch (adapter->hw.api_version) { - case ixgbe_mbox_api_11: - case ixgbe_mbox_api_12: - max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; - break; - default: - if (adapter->hw.mac.type != ixgbe_mac_82599_vf) - max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; - break; - } - - /* MTU < 68 is an error and causes problems on some kernels */ - if ((new_mtu < 68) || (max_frame > max_possible_frame)) - return -EINVAL; - spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); @@ -3810,11 +3800,10 @@ static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) ixgbevf_free_irq(adapter); ixgbevf_free_all_tx_resources(adapter); ixgbevf_free_all_rx_resources(adapter); + ixgbevf_clear_interrupt_scheme(adapter); rtnl_unlock(); } - ixgbevf_clear_interrupt_scheme(adapter); - #ifdef CONFIG_PM retval = pci_save_state(pdev); if (retval) @@ -4104,6 +4093,23 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 68 - 1504 or 9710 */ + netdev->min_mtu = ETH_MIN_MTU; + switch (adapter->hw.api_version) { + case ixgbe_mbox_api_11: + case ixgbe_mbox_api_12: + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; + default: + if (adapter->hw.mac.type != ixgbe_mac_82599_vf) + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + else + netdev->max_mtu = ETH_DATA_LEN + ETH_FCS_LEN; + break; + } + if (IXGBE_REMOVED(hw->hw_addr)) { err = -EIO; goto err_sw_init; diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 836ebd8ee768..f9fcab54783c 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2357,14 +2357,6 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) { struct jme_adapter *jme = netdev_priv(netdev); - if (new_mtu == jme->old_mtu) - return 0; - - if (((new_mtu + ETH_HLEN) > MAX_ETHERNET_JUMBO_PACKET_SIZE) || - ((new_mtu) < IPV6_MIN_MTU)) - return -EINVAL; - - netdev->mtu = new_mtu; netdev_update_features(netdev); @@ -3063,6 +3055,10 @@ jme_init_one(struct pci_dev *pdev, if (using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* MTU range: 1280 - 9202*/ + netdev->min_mtu = IPV6_MIN_MTU; + netdev->max_mtu = MAX_ETHERNET_JUMBO_PACKET_SIZE - ETH_HLEN; + SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, netdev); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 1799fe1415df..cbeea915f026 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1085,7 +1085,6 @@ static const struct net_device_ops korina_netdev_ops = { .ndo_set_rx_mode = korina_multicast_list, .ndo_tx_timeout = korina_tx_timeout, .ndo_do_ioctl = korina_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 91e09d68b7e2..1a739d71f1c2 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -519,18 +519,16 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) static int ltq_etop_change_mtu(struct net_device *dev, int new_mtu) { - int ret = eth_change_mtu(dev, new_mtu); + struct ltq_etop_priv *priv = netdev_priv(dev); + unsigned long flags; - if (!ret) { - struct ltq_etop_priv *priv = netdev_priv(dev); - unsigned long flags; + dev->mtu = new_mtu; - spin_lock_irqsave(&priv->lock, flags); - ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu, - LTQ_ETOP_IGPLEN); - spin_unlock_irqrestore(&priv->lock, flags); - } - return ret; + spin_lock_irqsave(&priv->lock, flags); + ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu, LTQ_ETOP_IGPLEN); + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; } static int diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 5b12022adf1f..81b08d71c0f8 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -384,8 +384,6 @@ struct mv643xx_eth_private { struct net_device *dev; - struct phy_device *phy; - struct timer_list mib_counters_timer; spinlock_t mib_counters_lock; struct mib_counters mib_counters; @@ -1236,7 +1234,7 @@ static void mv643xx_eth_adjust_link(struct net_device *dev) DISABLE_AUTO_NEG_FOR_FLOW_CTRL | DISABLE_AUTO_NEG_FOR_DUPLEX; - if (mp->phy->autoneg == AUTONEG_ENABLE) { + if (dev->phydev->autoneg == AUTONEG_ENABLE) { /* enable auto negotiation */ pscr &= ~autoneg_disable; goto out_write; @@ -1244,7 +1242,7 @@ static void mv643xx_eth_adjust_link(struct net_device *dev) pscr |= autoneg_disable; - if (mp->phy->speed == SPEED_1000) { + if (dev->phydev->speed == SPEED_1000) { /* force gigabit, half duplex not supported */ pscr |= SET_GMII_SPEED_TO_1000; pscr |= SET_FULL_DUPLEX_MODE; @@ -1253,12 +1251,12 @@ static void mv643xx_eth_adjust_link(struct net_device *dev) pscr &= ~SET_GMII_SPEED_TO_1000; - if (mp->phy->speed == SPEED_100) + if (dev->phydev->speed == SPEED_100) pscr |= SET_MII_SPEED_TO_100; else pscr &= ~SET_MII_SPEED_TO_100; - if (mp->phy->duplex == DUPLEX_FULL) + if (dev->phydev->duplex == DUPLEX_FULL) pscr |= SET_FULL_DUPLEX_MODE; else pscr &= ~SET_FULL_DUPLEX_MODE; @@ -1499,55 +1497,69 @@ static const struct mv643xx_eth_stats mv643xx_eth_stats[] = { }; static int -mv643xx_eth_get_settings_phy(struct mv643xx_eth_private *mp, - struct ethtool_cmd *cmd) +mv643xx_eth_get_link_ksettings_phy(struct mv643xx_eth_private *mp, + struct ethtool_link_ksettings *cmd) { + struct net_device *dev = mp->dev; int err; + u32 supported, advertising; - err = phy_read_status(mp->phy); + err = phy_read_status(dev->phydev); if (err == 0) - err = phy_ethtool_gset(mp->phy, cmd); + err = phy_ethtool_ksettings_get(dev->phydev, cmd); /* * The MAC does not support 1000baseT_Half. */ - cmd->supported &= ~SUPPORTED_1000baseT_Half; - cmd->advertising &= ~ADVERTISED_1000baseT_Half; + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + supported &= ~SUPPORTED_1000baseT_Half; + advertising &= ~ADVERTISED_1000baseT_Half; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return err; } static int -mv643xx_eth_get_settings_phyless(struct mv643xx_eth_private *mp, - struct ethtool_cmd *cmd) +mv643xx_eth_get_link_ksettings_phyless(struct mv643xx_eth_private *mp, + struct ethtool_link_ksettings *cmd) { u32 port_status; + u32 supported, advertising; port_status = rdlp(mp, PORT_STATUS); - cmd->supported = SUPPORTED_MII; - cmd->advertising = ADVERTISED_MII; + supported = SUPPORTED_MII; + advertising = ADVERTISED_MII; switch (port_status & PORT_SPEED_MASK) { case PORT_SPEED_10: - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; break; case PORT_SPEED_100: - ethtool_cmd_speed_set(cmd, SPEED_100); + cmd->base.speed = SPEED_100; break; case PORT_SPEED_1000: - ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->base.speed = SPEED_1000; break; default: - cmd->speed = -1; + cmd->base.speed = -1; break; } - cmd->duplex = (port_status & FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->port = PORT_MII; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 1; - cmd->maxrxpkt = 1; + cmd->base.duplex = (port_status & FULL_DUPLEX) ? + DUPLEX_FULL : DUPLEX_HALF; + cmd->base.port = PORT_MII; + cmd->base.phy_address = 0; + cmd->base.autoneg = AUTONEG_DISABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } @@ -1555,23 +1567,21 @@ mv643xx_eth_get_settings_phyless(struct mv643xx_eth_private *mp, static void mv643xx_eth_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { - struct mv643xx_eth_private *mp = netdev_priv(dev); wol->supported = 0; wol->wolopts = 0; - if (mp->phy) - phy_ethtool_get_wol(mp->phy, wol); + if (dev->phydev) + phy_ethtool_get_wol(dev->phydev, wol); } static int mv643xx_eth_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { - struct mv643xx_eth_private *mp = netdev_priv(dev); int err; - if (mp->phy == NULL) + if (!dev->phydev) return -EOPNOTSUPP; - err = phy_ethtool_set_wol(mp->phy, wol); + err = phy_ethtool_set_wol(dev->phydev, wol); /* Given that mv643xx_eth works without the marvell-specific PHY driver, * this debugging hint is useful to have. */ @@ -1581,31 +1591,38 @@ mv643xx_eth_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) } static int -mv643xx_eth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +mv643xx_eth_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct mv643xx_eth_private *mp = netdev_priv(dev); - if (mp->phy != NULL) - return mv643xx_eth_get_settings_phy(mp, cmd); + if (dev->phydev) + return mv643xx_eth_get_link_ksettings_phy(mp, cmd); else - return mv643xx_eth_get_settings_phyless(mp, cmd); + return mv643xx_eth_get_link_ksettings_phyless(mp, cmd); } static int -mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +mv643xx_eth_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { - struct mv643xx_eth_private *mp = netdev_priv(dev); + struct ethtool_link_ksettings c = *cmd; + u32 advertising; int ret; - if (mp->phy == NULL) + if (!dev->phydev) return -EINVAL; /* * The MAC does not support 1000baseT_Half. */ - cmd->advertising &= ~ADVERTISED_1000baseT_Half; + ethtool_convert_link_mode_to_legacy_u32(&advertising, + c.link_modes.advertising); + advertising &= ~ADVERTISED_1000baseT_Half; + ethtool_convert_legacy_u32_to_link_mode(c.link_modes.advertising, + advertising); - ret = phy_ethtool_sset(mp->phy, cmd); + ret = phy_ethtool_ksettings_set(dev->phydev, &c); if (!ret) mv643xx_eth_adjust_link(dev); return ret; @@ -1624,12 +1641,10 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev, static int mv643xx_eth_nway_reset(struct net_device *dev) { - struct mv643xx_eth_private *mp = netdev_priv(dev); - - if (mp->phy == NULL) + if (!dev->phydev) return -EINVAL; - return genphy_restart_aneg(mp->phy); + return genphy_restart_aneg(dev->phydev); } static int @@ -1754,8 +1769,6 @@ static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset) } static const struct ethtool_ops mv643xx_eth_ethtool_ops = { - .get_settings = mv643xx_eth_get_settings, - .set_settings = mv643xx_eth_set_settings, .get_drvinfo = mv643xx_eth_get_drvinfo, .nway_reset = mv643xx_eth_nway_reset, .get_link = ethtool_op_get_link, @@ -1769,6 +1782,8 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_wol = mv643xx_eth_get_wol, .set_wol = mv643xx_eth_set_wol, + .get_link_ksettings = mv643xx_eth_get_link_ksettings, + .set_link_ksettings = mv643xx_eth_set_link_ksettings, }; @@ -2328,19 +2343,21 @@ static inline void oom_timer_wrapper(unsigned long data) static void port_start(struct mv643xx_eth_private *mp) { + struct net_device *dev = mp->dev; u32 pscr; int i; /* * Perform PHY reset, if there is a PHY. */ - if (mp->phy != NULL) { - struct ethtool_cmd cmd; + if (dev->phydev) { + struct ethtool_link_ksettings cmd; - mv643xx_eth_get_settings(mp->dev, &cmd); - phy_init_hw(mp->phy); - mv643xx_eth_set_settings(mp->dev, &cmd); - phy_start(mp->phy); + mv643xx_eth_get_link_ksettings(dev, &cmd); + phy_init_hw(dev->phydev); + mv643xx_eth_set_link_ksettings( + dev, (const struct ethtool_link_ksettings *)&cmd); + phy_start(dev->phydev); } /* @@ -2352,7 +2369,7 @@ static void port_start(struct mv643xx_eth_private *mp) wrlp(mp, PORT_SERIAL_CONTROL, pscr); pscr |= DO_NOT_FORCE_LINK_FAIL; - if (mp->phy == NULL) + if (!dev->phydev) pscr |= FORCE_LINK_PASS; wrlp(mp, PORT_SERIAL_CONTROL, pscr); @@ -2536,8 +2553,8 @@ static int mv643xx_eth_stop(struct net_device *dev) del_timer_sync(&mp->rx_oom); netif_carrier_off(dev); - if (mp->phy) - phy_stop(mp->phy); + if (dev->phydev) + phy_stop(dev->phydev); free_irq(dev->irq, dev); port_reset(mp); @@ -2555,13 +2572,12 @@ static int mv643xx_eth_stop(struct net_device *dev) static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mv643xx_eth_private *mp = netdev_priv(dev); int ret; - if (mp->phy == NULL) + if (!dev->phydev) return -ENOTSUPP; - ret = phy_mii_ioctl(mp->phy, ifr, cmd); + ret = phy_mii_ioctl(dev->phydev, ifr, cmd); if (!ret) mv643xx_eth_adjust_link(dev); return ret; @@ -2571,9 +2587,6 @@ static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu) { struct mv643xx_eth_private *mp = netdev_priv(dev); - if (new_mtu < 64 || new_mtu > 9500) - return -EINVAL; - dev->mtu = new_mtu; mv643xx_eth_recalc_skb_size(mp); tx_set_rate(mp, 1000000000, 16777216); @@ -3024,7 +3037,8 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) { - struct phy_device *phy = mp->phy; + struct net_device *dev = mp->dev; + struct phy_device *phy = dev->phydev; if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; @@ -3042,6 +3056,7 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex) { + struct net_device *dev = mp->dev; u32 pscr; pscr = rdlp(mp, PORT_SERIAL_CONTROL); @@ -3051,7 +3066,7 @@ static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex) } pscr = MAX_RX_PACKET_9700BYTE | SERIAL_PORT_CONTROL_RESERVED; - if (mp->phy == NULL) { + if (!dev->phydev) { pscr |= DISABLE_AUTO_NEG_SPEED_GMII; if (speed == SPEED_1000) pscr |= SET_GMII_SPEED_TO_1000; @@ -3090,6 +3105,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) struct mv643xx_eth_platform_data *pd; struct mv643xx_eth_private *mp; struct net_device *dev; + struct phy_device *phydev = NULL; struct resource *res; int err; @@ -3146,18 +3162,18 @@ static int mv643xx_eth_probe(struct platform_device *pdev) err = 0; if (pd->phy_node) { - mp->phy = of_phy_connect(mp->dev, pd->phy_node, - mv643xx_eth_adjust_link, 0, - get_phy_mode(mp)); - if (!mp->phy) + phydev = of_phy_connect(mp->dev, pd->phy_node, + mv643xx_eth_adjust_link, 0, + get_phy_mode(mp)); + if (!phydev) err = -ENODEV; else - phy_addr_set(mp, mp->phy->mdio.addr); + phy_addr_set(mp, phydev->mdio.addr); } else if (pd->phy_addr != MV643XX_ETH_PHY_NONE) { - mp->phy = phy_scan(mp, pd->phy_addr); + phydev = phy_scan(mp, pd->phy_addr); - if (IS_ERR(mp->phy)) - err = PTR_ERR(mp->phy); + if (IS_ERR(phydev)) + err = PTR_ERR(phydev); else phy_init(mp, pd->speed, pd->duplex); } @@ -3206,6 +3222,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->priv_flags |= IFF_UNICAST_FLT; dev->gso_max_segs = MV643XX_MAX_TSO_SEGS; + /* MTU range: 64 - 9500 */ + dev->min_mtu = 64; + dev->max_mtu = 9500; + if (mp->shared->win_protect) wrl(mp, WINDOW_PROTECT(mp->port_num), mp->shared->win_protect); @@ -3239,10 +3259,11 @@ out: static int mv643xx_eth_remove(struct platform_device *pdev) { struct mv643xx_eth_private *mp = platform_get_drvdata(pdev); + struct net_device *dev = mp->dev; unregister_netdev(mp->dev); - if (mp->phy != NULL) - phy_disconnect(mp->phy); + if (dev->phydev) + phy_disconnect(dev->phydev); cancel_work_sync(&mp->tx_timeout_task); if (!IS_ERR(mp->clk)) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5cb07c2017bf..b85819ea8eea 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3024,29 +3024,6 @@ static void mvneta_stop_dev(struct mvneta_port *pp) mvneta_rx_reset(pp); } -/* Return positive if MTU is valid */ -static int mvneta_check_mtu_valid(struct net_device *dev, int mtu) -{ - if (mtu < 68) { - netdev_err(dev, "cannot change mtu to less than 68\n"); - return -EINVAL; - } - - /* 9676 == 9700 - 20 and rounding to 8 */ - if (mtu > 9676) { - netdev_info(dev, "Illegal MTU value %d, round to 9676\n", mtu); - mtu = 9676; - } - - if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { - netdev_info(dev, "Illegal MTU value %d, rounding to %d\n", - mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8)); - mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); - } - - return mtu; -} - static void mvneta_percpu_enable(void *arg) { struct mvneta_port *pp = arg; @@ -3067,9 +3044,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) struct mvneta_port *pp = netdev_priv(dev); int ret; - mtu = mvneta_check_mtu_valid(dev, mtu); - if (mtu < 0) - return -EINVAL; + if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { + netdev_info(dev, "Illegal MTU value %d, rounding to %d\n", + mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8)); + mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); + } dev->mtu = mtu; @@ -4154,6 +4133,11 @@ static int mvneta_probe(struct platform_device *pdev) dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; dev->gso_max_segs = MVNETA_MAX_TSO_SEGS; + /* MTU range: 68 - 9676 */ + dev->min_mtu = ETH_MIN_MTU; + /* 9676 == 9700 - 20 and rounding to 8 */ + dev->max_mtu = 9676; + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register\n"); diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 60227a3452a4..c8bf155cad94 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5453,29 +5453,6 @@ static void mvpp2_stop_dev(struct mvpp2_port *port) phy_stop(ndev->phydev); } -/* Return positive if MTU is valid */ -static inline int mvpp2_check_mtu_valid(struct net_device *dev, int mtu) -{ - if (mtu < 68) { - netdev_err(dev, "cannot change mtu to less than 68\n"); - return -EINVAL; - } - - /* 9676 == 9700 - 20 and rounding to 8 */ - if (mtu > 9676) { - netdev_info(dev, "illegal MTU value %d, round to 9676\n", mtu); - mtu = 9676; - } - - if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) { - netdev_info(dev, "illegal MTU value %d, round to %d\n", mtu, - ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8)); - mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8); - } - - return mtu; -} - static int mvpp2_check_ringparam_valid(struct net_device *dev, struct ethtool_ringparam *ring) { @@ -5717,10 +5694,10 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu) struct mvpp2_port *port = netdev_priv(dev); int err; - mtu = mvpp2_check_mtu_valid(dev, mtu); - if (mtu < 0) { - err = mtu; - goto error; + if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) { + netdev_info(dev, "illegal MTU value %d, round to %d\n", mtu, + ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8)); + mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8); } if (!netif_running(dev)) { @@ -6212,6 +6189,11 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; dev->vlan_features |= features; + /* MTU range: 68 - 9676 */ + dev->min_mtu = ETH_MIN_MTU; + /* 9676 == 9700 - 20 and rounding to 8 */ + dev->max_mtu = 9676; + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register netdev\n"); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 5d5000c8edf1..b78a838f306c 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1209,9 +1209,6 @@ static int pxa168_eth_change_mtu(struct net_device *dev, int mtu) int retval; struct pxa168_eth_private *pep = netdev_priv(dev); - if ((mtu > 9500) || (mtu < 68)) - return -EINVAL; - dev->mtu = mtu; retval = set_port_config_ext(pep); @@ -1459,6 +1456,10 @@ static int pxa168_eth_probe(struct platform_device *pdev) dev->base_addr = 0; dev->ethtool_ops = &pxa168_ethtool_ops; + /* MTU range: 68 - 9500 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 9500; + INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); if (pdev->dev.of_node) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 7173836fe361..9146a514fb33 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -1048,7 +1048,7 @@ static const char *skge_pause(enum pause_status status) static void skge_link_up(struct skge_port *skge) { skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), - LED_BLK_OFF|LED_SYNC_OFF|LED_ON); + LED_BLK_OFF|LED_SYNC_OFF|LED_REG_ON); netif_carrier_on(skge->netdev); netif_wake_queue(skge->netdev); @@ -1062,7 +1062,7 @@ static void skge_link_up(struct skge_port *skge) static void skge_link_down(struct skge_port *skge) { - skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF); + skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_REG_OFF); netif_carrier_off(skge->netdev); netif_stop_queue(skge->netdev); @@ -2668,7 +2668,7 @@ static int skge_down(struct net_device *dev) if (hw->ports == 1) free_irq(hw->pdev->irq, hw); - skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF); + skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_REG_OFF); if (is_genesis(hw)) genesis_stop(skge); else @@ -2900,9 +2900,6 @@ static int skge_change_mtu(struct net_device *dev, int new_mtu) { int err; - if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) - return -EINVAL; - if (!netif_running(dev)) { dev->mtu = new_mtu; return 0; @@ -3857,6 +3854,10 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, dev->watchdog_timeo = TX_WATCHDOG; dev->irq = hw->pdev->irq; + /* MTU range: 60 - 9000 */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = ETH_JUMBO_MTU; + if (highmem) dev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/marvell/skge.h b/drivers/net/ethernet/marvell/skge.h index a2eb34115844..3ea151ff9c43 100644 --- a/drivers/net/ethernet/marvell/skge.h +++ b/drivers/net/ethernet/marvell/skge.h @@ -662,8 +662,8 @@ enum { LED_BLK_OFF = 1<<4, /* Link LED Blinking Off */ LED_SYNC_ON = 1<<3, /* Use Sync Wire to switch LED */ LED_SYNC_OFF = 1<<2, /* Disable Sync Wire Input */ - LED_ON = 1<<1, /* switch LED on */ - LED_OFF = 1<<0, /* switch LED off */ + LED_REG_ON = 1<<1, /* switch LED on */ + LED_REG_OFF = 1<<0, /* switch LED off */ }; /* Receive GMAC FIFO (YUKON) */ diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index f05ea56dcff2..aa60f4dcddd8 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2398,16 +2398,6 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) u16 ctl, mode; u32 imask; - /* MTU size outside the spec */ - if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) - return -EINVAL; - - /* MTU > 1500 on yukon FE and FE+ not allowed */ - if (new_mtu > ETH_DATA_LEN && - (hw->chip_id == CHIP_ID_YUKON_FE || - hw->chip_id == CHIP_ID_YUKON_FE_P)) - return -EINVAL; - if (!netif_running(dev)) { dev->mtu = new_mtu; netdev_update_features(dev); @@ -4808,6 +4798,14 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, dev->features |= dev->hw_features; + /* MTU range: 60 - 1500 or 9000 */ + dev->min_mtu = ETH_ZLEN; + if (hw->chip_id == CHIP_ID_YUKON_FE || + hw->chip_id == CHIP_ID_YUKON_FE_P) + dev->max_mtu = ETH_DATA_LEN; + else + dev->max_mtu = ETH_JUMBO_MTU; + /* try to get mac address in the following order: * 1) from device tree data * 2) from internal registers set by bootloader diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4a62ffd7729d..d71627417ea7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -843,7 +843,7 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: spin_unlock(ð->page_lock); stats->tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -2243,7 +2243,6 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_set_mac_address = mtk_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = mtk_do_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, .ndo_fix_features = mtk_fix_features, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index e3be7e44ff51..03f05c4d1f98 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -65,7 +65,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->buf_size = cq->size * mdev->dev->caps.cqe_size; cq->ring = ring; - cq->is_tx = mode; + cq->type = mode; cq->vector = mdev->dev->caps.num_comp_vectors; /* Allocate HW buffers on provided NUMA node. @@ -104,7 +104,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, *cq->mcq.arm_db = 0; memset(cq->buf, 0, cq->buf_size); - if (cq->is_tx == RX) { + if (cq->type == RX) { if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector)) { cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask); @@ -127,25 +127,17 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, /* For TX we use the same irq per ring we assigned for the RX */ struct mlx4_en_cq *rx_cq; - int xdp_index; - - /* The xdp tx irq must align with the rx ring that forwards to - * it, so reindex these from 0. This should only happen when - * tx_ring_num is not a multiple of rx_ring_num. - */ - xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx; - if (xdp_index >= 0) - cq_idx = xdp_index; + cq_idx = cq_idx % priv->rx_ring_num; rx_cq = priv->rx_cq[cq_idx]; cq->vector = rx_cq->vector; } - if (!cq->is_tx) + if (cq->type == RX) cq->size = priv->rx_ring[cq->ring]->actual_size; - if ((cq->is_tx && priv->hwtstamp_config.tx_type) || - (!cq->is_tx && priv->hwtstamp_config.rx_filter)) + if ((cq->type != RX && priv->hwtstamp_config.tx_type) || + (cq->type == RX && priv->hwtstamp_config.rx_filter)) timestamp_en = 1; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, @@ -154,10 +146,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (err) goto free_eq; - cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; + cq->mcq.comp = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) + if (cq->type != RX) netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); else @@ -181,7 +173,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && - cq->is_tx == RX) + cq->type == RX) mlx4_release_eq(priv->mdev->dev, cq->vector); cq->vector = 0; cq->buf_size = 0; @@ -193,7 +185,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { napi_disable(&cq->napi); - if (!cq->is_tx) { + if (cq->type == RX) { napi_hash_del(&cq->napi); synchronize_rcu(); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index bdda17d2ea0f..487a58f9c192 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -49,16 +49,19 @@ static int mlx4_en_moderation_update(struct mlx4_en_priv *priv) { - int i; + int i, t; int err = 0; - for (i = 0; i < priv->tx_ring_num; i++) { - priv->tx_cq[i]->moder_cnt = priv->tx_frames; - priv->tx_cq[i]->moder_time = priv->tx_usecs; - if (priv->port_up) { - err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]); - if (err) - return err; + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + priv->tx_cq[t][i]->moder_cnt = priv->tx_frames; + priv->tx_cq[t][i]->moder_time = priv->tx_usecs; + if (priv->port_up) { + err = mlx4_en_set_cq_moder(priv, + priv->tx_cq[t][i]); + if (err) + return err; + } } } @@ -192,6 +195,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "tx_prio_7_packets", "tx_prio_7_bytes", "tx_novlan_packets", "tx_novlan_bytes", + /* xdp statistics */ + "rx_xdp_drop", + "rx_xdp_tx", + "rx_xdp_tx_full", }; static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { @@ -336,8 +343,8 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return bitmap_iterator_count(&it) + - (priv->tx_ring_num * 2) + - (priv->rx_ring_num * 3); + (priv->tx_ring_num[TX] * 2) + + (priv->rx_ring_num * (3 + NUM_XDP_STATS)); case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -397,14 +404,21 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, if (bitmap_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->pkstats)[i]; - for (i = 0; i < priv->tx_ring_num; i++) { - data[index++] = priv->tx_ring[i]->packets; - data[index++] = priv->tx_ring[i]->bytes; + for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->xdp_stats)[i]; + + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + data[index++] = priv->tx_ring[TX][i]->packets; + data[index++] = priv->tx_ring[TX][i]->bytes; } for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; data[index++] = priv->rx_ring[i]->dropped; + data[index++] = priv->rx_ring[i]->xdp_drop; + data[index++] = priv->rx_ring[i]->xdp_tx; + data[index++] = priv->rx_ring[i]->xdp_tx_full; } spin_unlock_bh(&priv->stats_lock); @@ -467,7 +481,13 @@ static void mlx4_en_get_strings(struct net_device *dev, strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[strings]); - for (i = 0; i < priv->tx_ring_num; i++) { + for (i = 0; i < NUM_XDP_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + + for (i = 0; i < priv->tx_ring_num[TX]; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, @@ -480,6 +500,12 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_bytes", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_dropped", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_drop", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_tx", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_xdp_tx_full", i); } break; case ETH_SS_PRIV_FLAGS: @@ -1060,7 +1086,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev, if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && - tx_size == priv->tx_ring[0]->size) + tx_size == priv->tx_ring[TX][0]->size) return 0; tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); @@ -1105,7 +1131,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev, param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; param->rx_pending = priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size; - param->tx_pending = priv->tx_ring[0]->size; + param->tx_pending = priv->tx_ring[TX][0]->size; } static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) @@ -1710,7 +1736,7 @@ static void mlx4_en_get_channels(struct net_device *dev, channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; channel->rx_count = priv->rx_ring_num; - channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP; + channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP; } static int mlx4_en_set_channels(struct net_device *dev, @@ -1721,6 +1747,7 @@ static int mlx4_en_set_channels(struct net_device *dev, struct mlx4_en_port_profile new_prof; struct mlx4_en_priv *tmp; int port_up = 0; + int xdp_count; int err = 0; if (channel->other_count || channel->combined_count || @@ -1729,20 +1756,25 @@ static int mlx4_en_set_channels(struct net_device *dev, !channel->tx_count || !channel->rx_count) return -EINVAL; - if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) { - en_err(priv, "Minimum %d tx channels required with XDP on\n", - priv->xdp_ring_num / MLX4_EN_NUM_UP + 1); - return -EINVAL; - } - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) return -ENOMEM; mutex_lock(&mdev->state_lock); + xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; + if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) { + err = -EINVAL; + en_err(priv, + "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", + channel->tx_count * MLX4_EN_NUM_UP + xdp_count, + MAX_TX_RINGS); + goto out; + } + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); new_prof.num_tx_rings_p_up = channel->tx_count; - new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.tx_ring_num[TX_XDP] = xdp_count; new_prof.rx_ring_num = channel->rx_count; err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); @@ -1756,14 +1788,13 @@ static int mlx4_en_set_channels(struct net_device *dev, mlx4_en_safe_replace_resources(priv, tmp); - netif_set_real_num_tx_queues(dev, priv->tx_ring_num - - priv->xdp_ring_num); + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); if (dev->num_tc) mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); - en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num); + en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); if (port_up) { @@ -1774,8 +1805,8 @@ static int mlx4_en_set_channels(struct net_device *dev, err = mlx4_en_moderation_update(priv); out: - kfree(tmp); mutex_unlock(&mdev->state_lock); + kfree(tmp); return err; } @@ -1823,11 +1854,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) int ret = 0; if (bf_enabled_new != bf_enabled_old) { + int t; + if (bf_enabled_new) { bool bf_supported = true; - for (i = 0; i < priv->tx_ring_num; i++) - bf_supported &= priv->tx_ring[i]->bf_alloced; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + bf_supported &= + priv->tx_ring[t][i]->bf_alloced; if (!bf_supported) { en_err(priv, "BlueFlame is not supported\n"); @@ -1839,8 +1874,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; } - for (i = 0; i < priv->tx_ring_num; i++) - priv->tx_ring[i]->bf_enabled = bf_enabled_new; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + priv->tx_ring[t][i]->bf_enabled = + bf_enabled_new; en_info(priv, "BlueFlame %s\n", bf_enabled_new ? "Enabled" : "Disabled"); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index bf7628db098a..36a7a54bbb82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; - params->prof[i].tx_ring_num = params->num_tx_rings_p_up * + params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * MLX4_EN_NUM_UP; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3a47e83d3e07..9a807e93c9fd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1214,8 +1214,8 @@ static void mlx4_en_netpoll(struct net_device *dev) struct mlx4_en_cq *cq; int i; - for (i = 0; i < priv->tx_ring_num; i++) { - cq = priv->tx_cq[i]; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + cq = priv->tx_cq[TX][i]; napi_schedule(&cq->napi); } } @@ -1299,12 +1299,14 @@ static void mlx4_en_tx_timeout(struct net_device *dev) if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); - for (i = 0; i < priv->tx_ring_num; i++) { + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i]; + if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) continue; en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", - i, priv->tx_ring[i]->qpn, priv->tx_ring[i]->cqn, - priv->tx_ring[i]->cons, priv->tx_ring[i]->prod); + i, tx_ring->qpn, tx_ring->cqn, + tx_ring->cons, tx_ring->prod); } priv->port_stats.tx_timeout++; @@ -1328,7 +1330,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; - int i; + int i, t; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: @@ -1353,10 +1355,12 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) priv->last_moder_bytes[i] = 0; } - for (i = 0; i < priv->tx_ring_num; i++) { - cq = priv->tx_cq[i]; - cq->moder_cnt = priv->tx_frames; - cq->moder_time = priv->tx_usecs; + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + cq = priv->tx_cq[t][i]; + cq->moder_cnt = priv->tx_frames; + cq->moder_time = priv->tx_usecs; + } } /* Reset auto-moderation params */ @@ -1526,19 +1530,13 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, int tx_ring_idx) { - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx]; - int rr_index; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX_XDP][tx_ring_idx]; + int rr_index = tx_ring_idx; - rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx; - if (rr_index >= 0) { - tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; - tx_ring->recycle_ring = priv->rx_ring[rr_index]; - en_dbg(DRV, priv, - "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n", - tx_ring_idx, rr_index); - } else { - tx_ring->recycle_ring = NULL; - } + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, "Set tx_ring[%d][%d]->recycle_ring = rx_ring[%d]\n", + TX_XDP, tx_ring_idx, rr_index); } int mlx4_en_start_port(struct net_device *dev) @@ -1548,9 +1546,8 @@ int mlx4_en_start_port(struct net_device *dev) struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; - int tx_index = 0; int err = 0; - int i; + int i, t; int j; u8 mc_list[16] = {0}; @@ -1635,43 +1632,51 @@ int mlx4_en_start_port(struct net_device *dev) goto rss_err; /* Configure tx cq's and rings */ - for (i = 0; i < priv->tx_ring_num; i++) { - /* Configure cq */ - cq = priv->tx_cq[i]; - err = mlx4_en_activate_cq(priv, cq, i); - if (err) { - en_err(priv, "Failed allocating Tx CQ\n"); - goto tx_err; - } - err = mlx4_en_set_cq_moder(priv, cq); - if (err) { - en_err(priv, "Failed setting cq moderation parameters\n"); - mlx4_en_deactivate_cq(priv, cq); - goto tx_err; - } - en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); - cq->buf->wqe_index = cpu_to_be16(0xffff); - - /* Configure ring */ - tx_ring = priv->tx_ring[i]; - err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, - i / priv->num_tx_rings_p_up); - if (err) { - en_err(priv, "Failed allocating Tx ring\n"); - mlx4_en_deactivate_cq(priv, cq); - goto tx_err; - } - tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { + u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; - mlx4_en_init_recycle_ring(priv, i); + for (i = 0; i < priv->tx_ring_num[t]; i++) { + /* Configure cq */ + cq = priv->tx_cq[t][i]; + err = mlx4_en_activate_cq(priv, cq, i); + if (err) { + en_err(priv, "Failed allocating Tx CQ\n"); + goto tx_err; + } + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + en_err(priv, "Failed setting cq moderation parameters\n"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + en_dbg(DRV, priv, + "Resetting index of collapsed CQ:%d to -1\n", i); + cq->buf->wqe_index = cpu_to_be16(0xffff); + + /* Configure ring */ + tx_ring = priv->tx_ring[t][i]; + err = mlx4_en_activate_tx_ring(priv, tx_ring, + cq->mcq.cqn, + i / num_tx_rings_p_up); + if (err) { + en_err(priv, "Failed allocating Tx ring\n"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + if (t != TX_XDP) { + tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + tx_ring->recycle_ring = NULL; + } else { + mlx4_en_init_recycle_ring(priv, i); + } - /* Arm CQ for TX completions */ - mlx4_en_arm_cq(priv, cq); + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); - /* Set initial ownership of all Tx TXBBs to SW (1) */ - for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) - *((u32 *) (tx_ring->buf + j)) = 0xffffffff; - ++tx_index; + /* Set initial ownership of all Tx TXBBs to SW (1) */ + for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) + *((u32 *)(tx_ring->buf + j)) = 0xffffffff; + } } /* Configure port */ @@ -1746,9 +1751,18 @@ int mlx4_en_start_port(struct net_device *dev) return 0; tx_err: - while (tx_index--) { - mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); - mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); + if (t == MLX4_EN_NUM_TX_TYPES) { + t--; + i = priv->tx_ring_num[t]; + } + while (t >= 0) { + while (i--) { + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]); + } + if (!t--) + break; + i = priv->tx_ring_num[t]; } mlx4_en_destroy_drop_qp(priv); rss_err: @@ -1773,7 +1787,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_mc_list *mclist, *tmp; struct ethtool_flow_id *flow, *tmp_flow; - int i; + int i, t; u8 mc_list[16] = {0}; if (!priv->port_up) { @@ -1859,14 +1873,17 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ - for (i = 0; i < priv->tx_ring_num; i++) { - mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); - mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]); + } } msleep(10); - for (i = 0; i < priv->tx_ring_num; i++) - mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) + for (i = 0; i < priv->tx_ring_num[t]; i++) + mlx4_en_free_tx_buf(dev, priv->tx_ring[t][i]); if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) mlx4_en_delete_rss_steer_rules(priv); @@ -1915,6 +1932,7 @@ static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring **tx_ring; int i; if (!mlx4_is_slave(mdev->dev)) @@ -1932,15 +1950,16 @@ static void mlx4_en_clear_stats(struct net_device *dev) sizeof(priv->tx_priority_flowstats)); memset(&priv->pf_stats, 0, sizeof(priv->pf_stats)); - for (i = 0; i < priv->tx_ring_num; i++) { - priv->tx_ring[i]->bytes = 0; - priv->tx_ring[i]->packets = 0; - priv->tx_ring[i]->tx_csum = 0; - priv->tx_ring[i]->tx_dropped = 0; - priv->tx_ring[i]->queue_stopped = 0; - priv->tx_ring[i]->wake_queue = 0; - priv->tx_ring[i]->tso_packets = 0; - priv->tx_ring[i]->xmit_more = 0; + tx_ring = priv->tx_ring[TX]; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + tx_ring[i]->bytes = 0; + tx_ring[i]->packets = 0; + tx_ring[i]->tx_csum = 0; + tx_ring[i]->tx_dropped = 0; + tx_ring[i]->queue_stopped = 0; + tx_ring[i]->wake_queue = 0; + tx_ring[i]->tso_packets = 0; + tx_ring[i]->xmit_more = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; @@ -1996,17 +2015,20 @@ static int mlx4_en_close(struct net_device *dev) static void mlx4_en_free_resources(struct mlx4_en_priv *priv) { - int i; + int i, t; #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = NULL; #endif - for (i = 0; i < priv->tx_ring_num; i++) { - if (priv->tx_ring && priv->tx_ring[i]) - mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); - if (priv->tx_cq && priv->tx_cq[i]) - mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + if (priv->tx_ring[t] && priv->tx_ring[t][i]) + mlx4_en_destroy_tx_ring(priv, + &priv->tx_ring[t][i]); + if (priv->tx_cq[t] && priv->tx_cq[t][i]) + mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); + } } for (i = 0; i < priv->rx_ring_num; i++) { @@ -2022,20 +2044,22 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv) static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; - int i; + int i, t; int node; /* Create tx Rings */ - for (i = 0; i < priv->tx_ring_num; i++) { - node = cpu_to_node(i % num_online_cpus()); - if (mlx4_en_create_cq(priv, &priv->tx_cq[i], - prof->tx_ring_size, i, TX, node)) - goto err; - - if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], - prof->tx_ring_size, TXBB_SIZE, - node, i)) - goto err; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + node = cpu_to_node(i % num_online_cpus()); + if (mlx4_en_create_cq(priv, &priv->tx_cq[t][i], + prof->tx_ring_size, i, t, node)) + goto err; + + if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[t][i], + prof->tx_ring_size, + TXBB_SIZE, node, i)) + goto err; + } } /* Create rx Rings */ @@ -2067,11 +2091,14 @@ err: if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - for (i = 0; i < priv->tx_ring_num; i++) { - if (priv->tx_ring[i]) - mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); - if (priv->tx_cq[i]) - mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + for (i = 0; i < priv->tx_ring_num[t]; i++) { + if (priv->tx_ring[t][i]) + mlx4_en_destroy_tx_ring(priv, + &priv->tx_ring[t][i]); + if (priv->tx_cq[t][i]) + mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); + } } return -ENOMEM; } @@ -2088,10 +2115,11 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, struct mlx4_en_priv *src, struct mlx4_en_port_profile *prof) { + int t; + memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, sizeof(dst->hwtstamp_config)); dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up; - dst->tx_ring_num = prof->tx_ring_num; dst->rx_ring_num = prof->rx_ring_num; dst->flags = prof->flags; dst->mdev = src->mdev; @@ -2101,33 +2129,50 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); - dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, - GFP_KERNEL); - if (!dst->tx_ring) - return -ENOMEM; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + dst->tx_ring_num[t] = prof->tx_ring_num[t]; + if (!dst->tx_ring_num[t]) + continue; - dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, - GFP_KERNEL); - if (!dst->tx_cq) { - kfree(dst->tx_ring); - return -ENOMEM; + dst->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) * + MAX_TX_RINGS, GFP_KERNEL); + if (!dst->tx_ring[t]) + goto err_free_tx; + + dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * + MAX_TX_RINGS, GFP_KERNEL); + if (!dst->tx_cq[t]) { + kfree(dst->tx_ring[t]); + goto err_free_tx; + } } + return 0; + +err_free_tx: + while (t--) { + kfree(dst->tx_ring[t]); + kfree(dst->tx_cq[t]); + } + return -ENOMEM; } static void mlx4_en_update_priv(struct mlx4_en_priv *dst, struct mlx4_en_priv *src) { + int t; memcpy(dst->rx_ring, src->rx_ring, sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num); memcpy(dst->rx_cq, src->rx_cq, sizeof(struct mlx4_en_cq *) * src->rx_ring_num); memcpy(&dst->hwtstamp_config, &src->hwtstamp_config, sizeof(dst->hwtstamp_config)); - dst->tx_ring_num = src->tx_ring_num; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + dst->tx_ring_num[t] = src->tx_ring_num[t]; + dst->tx_ring[t] = src->tx_ring[t]; + dst->tx_cq[t] = src->tx_cq[t]; + } dst->rx_ring_num = src->rx_ring_num; - dst->tx_ring = src->tx_ring; - dst->tx_cq = src->tx_cq; memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); } @@ -2135,14 +2180,18 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp, struct mlx4_en_port_profile *prof) { + int t; + mlx4_en_copy_priv(tmp, priv, prof); if (mlx4_en_alloc_resources(tmp)) { en_warn(priv, "%s: Resource allocation failed, using previous configuration\n", __func__); - kfree(tmp->tx_ring); - kfree(tmp->tx_cq); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + kfree(tmp->tx_ring[t]); + kfree(tmp->tx_cq[t]); + } return -ENOMEM; } return 0; @@ -2161,6 +2210,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) struct mlx4_en_dev *mdev = priv->mdev; bool shutdown = mdev->dev->persist->interface_state & MLX4_INTERFACE_STATE_SHUTDOWN; + int t; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2197,8 +2247,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mlx4_en_free_resources(priv); - kfree(priv->tx_ring); - kfree(priv->tx_cq); + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + kfree(priv->tx_ring[t]); + kfree(priv->tx_cq[t]); + } if (!shutdown) free_netdev(dev); @@ -2213,11 +2265,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n", dev->mtu, new_mtu); - if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { - en_err(priv, "Bad MTU size:%d.\n", new_mtu); - return -EPERM; - } - if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { + if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { en_err(priv, "MTU size:%d requires frags but XDP running\n", new_mtu); return -EOPNOTSUPP; @@ -2608,7 +2656,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[queue_index]; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][queue_index]; struct mlx4_update_qp_params params; int err; @@ -2636,18 +2684,21 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; struct bpf_prog *old_prog; + struct mlx4_en_priv *tmp; + int tx_changed = 0; int xdp_ring_num; int port_up = 0; int err; int i; - xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0; + xdp_ring_num = prog ? priv->rx_ring_num : 0; /* No need to reconfigure buffers when simply swapping the * program for a new one. */ - if (priv->xdp_ring_num == xdp_ring_num) { + if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) { if (prog) { prog = bpf_prog_add(prog, priv->rx_ring_num - 1); if (IS_ERR(prog)) @@ -2671,28 +2722,44 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) return -EOPNOTSUPP; } - if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) { - en_err(priv, - "Minimum %d tx channels required to run XDP\n", - (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP); - return -EINVAL; - } + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; if (prog) { prog = bpf_prog_add(prog, priv->rx_ring_num - 1); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out; + } } mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.tx_ring_num[TX_XDP] = xdp_ring_num; + + if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) { + tx_changed = 1; + new_prof.tx_ring_num[TX] = + MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP); + en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); + } + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) { + if (prog) + bpf_prog_sub(prog, priv->rx_ring_num - 1); + goto unlock_out; + } + if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - priv->xdp_ring_num = xdp_ring_num; - netif_set_real_num_tx_queues(dev, priv->tx_ring_num - - priv->xdp_ring_num); + mlx4_en_safe_replace_resources(priv, tmp); + if (tx_changed) + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); for (i = 0; i < priv->rx_ring_num; i++) { old_prog = rcu_dereference_protected( @@ -2712,15 +2779,18 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) } } +unlock_out: mutex_unlock(&mdev->state_lock); - return 0; +out: + kfree(tmp); + return err; } static bool mlx4_xdp_attached(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - return !!priv->xdp_ring_num; + return !!priv->tx_ring_num[TX_XDP]; } static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3057,6 +3127,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, if (!mlx4_is_slave(dev)) bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS); + last_i += NUM_PKT_STATS; + + bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS); + last_i += NUM_XDP_STATS; } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -3064,7 +3138,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, { struct net_device *dev; struct mlx4_en_priv *priv; - int i; + int i, t; int err; dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), @@ -3072,7 +3146,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (dev == NULL) return -ENOMEM; - netif_set_real_num_tx_queues(dev, prof->tx_ring_num); + netif_set_real_num_tx_queues(dev, prof->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, prof->rx_ring_num); SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); @@ -3109,21 +3183,27 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; - priv->tx_ring_num = prof->tx_ring_num; priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); - priv->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, - GFP_KERNEL); - if (!priv->tx_ring) { - err = -ENOMEM; - goto out; - } - priv->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, - GFP_KERNEL); - if (!priv->tx_cq) { - err = -ENOMEM; - goto out; + for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { + priv->tx_ring_num[t] = prof->tx_ring_num[t]; + if (!priv->tx_ring_num[t]) + continue; + + priv->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) * + MAX_TX_RINGS, GFP_KERNEL); + if (!priv->tx_ring[t]) { + err = -ENOMEM; + goto err_free_tx; + } + priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * + MAX_TX_RINGS, GFP_KERNEL); + if (!priv->tx_cq[t]) { + kfree(priv->tx_ring[t]); + err = -ENOMEM; + goto out; + } } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; @@ -3206,7 +3286,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, else dev->netdev_ops = &mlx4_netdev_ops; dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT; - netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); dev->ethtool_ops = &mlx4_en_ethtool_ops; @@ -3296,13 +3376,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + /* MTU range: 46 - hw-specific max */ + dev->min_mtu = MLX4_EN_MIN_MTU; + dev->max_mtu = priv->max_mtu; + mdev->pndev[port] = dev; mdev->upper[port] = NULL; netif_carrier_off(dev); mlx4_en_set_default_moderation(priv); - en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num[TX]); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); mlx4_en_update_loopback_state(priv->dev, priv->dev->features); @@ -3362,6 +3446,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, return 0; +err_free_tx: + while (t--) { + kfree(priv->tx_ring[t]); + kfree(priv->tx_cq[t]); + } out: mlx4_en_destroy_netdev(dev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 59473a0ebcdf..1eb4c1e10bad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -179,6 +179,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; priv->port_stats.rx_chksum_complete = 0; + priv->xdp_stats.rx_xdp_drop = 0; + priv->xdp_stats.rx_xdp_tx = 0; + priv->xdp_stats.rx_xdp_tx_full = 0; for (i = 0; i < priv->rx_ring_num; i++) { stats->rx_packets += priv->rx_ring[i]->packets; stats->rx_bytes += priv->rx_ring[i]->bytes; @@ -186,6 +189,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete; + priv->xdp_stats.rx_xdp_drop += priv->rx_ring[i]->xdp_drop; + priv->xdp_stats.rx_xdp_tx += priv->rx_ring[i]->xdp_tx; + priv->xdp_stats.rx_xdp_tx_full += priv->rx_ring[i]->xdp_tx_full; } stats->tx_packets = 0; stats->tx_bytes = 0; @@ -196,8 +202,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.tso_packets = 0; priv->port_stats.xmit_more = 0; - for (i = 0; i < priv->tx_ring_num; i++) { - const struct mlx4_en_tx_ring *ring = priv->tx_ring[i]; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { + const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i]; stats->tx_packets += ring->packets; stats->tx_bytes += ring->bytes; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index f2e8beddcf44..2cc91002064f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -788,7 +788,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct bpf_prog *xdp_prog; int doorbell_pending; struct sk_buff *skb; - int tx_index; int index; int nr; unsigned int length; @@ -808,7 +807,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud rcu_read_lock(); xdp_prog = rcu_dereference(ring->xdp_prog); doorbell_pending = 0; - tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of @@ -877,8 +875,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; - ring->bytes += length; - ring->packets++; l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); @@ -904,22 +900,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (likely(!mlx4_en_xmit_frame(frags, dev, - length, tx_index, + if (likely(!mlx4_en_xmit_frame(ring, frags, dev, + length, cq->ring, &doorbell_pending))) goto consumed; - goto xdp_drop; /* Drop on xmit failure */ + goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: case XDP_DROP: -xdp_drop: + ring->xdp_drop++; +xdp_drop_no_cnt: if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } } + ring->bytes += length; + ring->packets++; + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { @@ -1082,7 +1082,7 @@ consumed: out: rcu_read_unlock(); if (doorbell_pending) - mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]); AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); @@ -1162,7 +1162,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) /* bpf requires buffers to be set up as 1 packet per page. * This only works when num_frags == 1. */ - if (priv->xdp_ring_num) { + if (priv->tx_ring_num[TX_XDP]) { dma_dir = PCI_DMA_BIDIRECTIONAL; /* This will gain efficient xdp frame recycling at the expense * of more costly truesize accounting diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index e2509bba3e7c..5de3cbe24f2b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -392,7 +392,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) cnt++; } - netdev_tx_reset_queue(ring->tx_queue); + if (ring->tx_queue) + netdev_tx_reset_queue(ring->tx_queue); if (cnt) en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); @@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; - struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; + struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index, stamp_index; @@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool bf_ok; tx_ind = skb_get_queue_mapping(skb); - ring = priv->tx_ring[tx_ind]; + ring = priv->tx_ring[TX][tx_ind]; if (!priv->port_up) goto tx_drop; @@ -1078,7 +1079,8 @@ tx_drop: return NETDEV_TX_OK; } -netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, + struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, int tx_ind, int *doorbell_pending) { @@ -1101,7 +1103,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, "mlx4_en_xmit_frame requires minimum size tx desc"); - ring = priv->tx_ring[tx_ind]; + ring = priv->tx_ring[TX_XDP][tx_ind]; if (!priv->port_up) goto tx_drop; @@ -1153,8 +1155,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, ((ring->prod & ring->size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); - ring->packets++; - ring->bytes += tx_info->nr_bytes; + rx_ring->xdp_tx++; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); ring->prod += nr_txbb; @@ -1178,7 +1179,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, return NETDEV_TX_OK; tx_drop_count: - ring->tx_dropped++; + rx_ring->xdp_tx_full++; tx_drop: return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a3528dd1e72e..eff21651b673 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -207,8 +207,11 @@ enum { */ enum cq_type { - RX = 0, - TX = 1, + /* keep tx types first */ + TX, + TX_XDP, +#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1) + RX, }; @@ -347,6 +350,9 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; + unsigned long xdp_drop; + unsigned long xdp_tx; + unsigned long xdp_tx_full; unsigned long dropped; int hwtstamp_rx_filter; cpumask_var_t affinity_mask; @@ -361,7 +367,7 @@ struct mlx4_en_cq { int size; int buf_size; int vector; - enum cq_type is_tx; + enum cq_type type; u16 moder_time; u16 moder_cnt; struct mlx4_cqe *buf; @@ -372,7 +378,7 @@ struct mlx4_en_cq { struct mlx4_en_port_profile { u32 flags; - u32 tx_ring_num; + u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES]; u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; @@ -569,17 +575,16 @@ struct mlx4_en_priv { u32 flags; u8 num_tx_rings_p_up; u32 tx_work_limit; - u32 tx_ring_num; + u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES]; u32 rx_ring_num; u32 rx_skb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 num_frags; u16 log_rx_info; - int xdp_ring_num; - struct mlx4_en_tx_ring **tx_ring; + struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; - struct mlx4_en_cq **tx_cq; + struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; @@ -597,6 +602,7 @@ struct mlx4_en_priv { struct mlx4_en_flow_stats_rx rx_flowstats; struct mlx4_en_flow_stats_tx tx_flowstats; struct mlx4_en_port_stats port_stats; + struct mlx4_en_xdp_stats xdp_stats; struct mlx4_en_stats_bitmap stats_bitmap; struct list_head mc_list; struct list_head curr_list; @@ -685,7 +691,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); -netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, + struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, int tx_ind, int *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 7fd466c0b929..48641cb0367f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -55,6 +55,13 @@ struct mlx4_en_perf_stats { #define NUM_PERF_COUNTERS 6 }; +struct mlx4_en_xdp_stats { + unsigned long rx_xdp_drop; + unsigned long rx_xdp_tx; + unsigned long rx_xdp_tx_full; +#define NUM_XDP_STATS 3 +}; + #define NUM_MAIN_STATS 21 #define MLX4_NUM_PRIORITIES 8 @@ -107,7 +114,8 @@ enum { }; #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ - NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS) + NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \ + NUM_XDP_STATS) #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ sizeof(((struct net_device_stats *)0)->n)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1e639f886021..8561102f2563 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -318,6 +318,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -419,11 +421,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -580,6 +585,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7a43502a89cc..ac09767b6984 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -524,7 +524,7 @@ struct mlx5e_vxlan_db { struct mlx5e_l2_rule { u8 addr[ETH_ALEN + 2]; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; }; struct mlx5e_flow_table { @@ -545,10 +545,10 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; - struct mlx5_flow_rule *untagged_rule; - struct mlx5_flow_rule *any_vlan_rule; - bool filter_disabled; + struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_vlan_rule; + bool filter_disabled; }; struct mlx5e_l2_table { @@ -566,14 +566,14 @@ struct mlx5e_l2_table { /* L3/L4 traffic type classifier */ struct mlx5e_ttc_table { struct mlx5e_flow_table ft; - struct mlx5_flow_rule *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) struct arfs_table { struct mlx5e_flow_table ft; - struct mlx5_flow_rule *default_rule; + struct mlx5_flow_handle *default_rule; struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; @@ -893,5 +893,9 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti); +void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index a8cb38789774..68419a01db36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -56,7 +56,7 @@ struct arfs_tuple { struct arfs_rule { struct mlx5e_priv *priv; struct work_struct arfs_work; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct hlist_node hlist; int rxq; /* Flow ID passed to ndo_rx_flow_steer */ @@ -104,7 +104,7 @@ static int arfs_disable(struct mlx5e_priv *priv) tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], - &dest); + &dest, NULL); if (err) { netdev_err(priv->netdev, "%s: modify ttc destination failed\n", @@ -137,7 +137,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) tt = arfs_get_tt(i); /* Modify ttc rules destination to point on the aRFS FTs */ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], - &dest); + &dest, NULL); if (err) { netdev_err(priv->netdev, "%s: modify ttc destination failed err=%d\n", @@ -151,7 +151,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) static void arfs_destroy_table(struct arfs_table *arfs_t) { - mlx5_del_flow_rule(arfs_t->default_rule); + mlx5_del_flow_rules(arfs_t->default_rule); mlx5e_destroy_flow_table(&arfs_t->ft); } @@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_spec *spec; @@ -205,10 +210,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, goto out; } - arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + &flow_act, + &dest, 1); if (IS_ERR(arfs_t->default_rule)) { err = PTR_ERR(arfs_t->default_rule); arfs_t->default_rule = NULL; @@ -324,7 +328,7 @@ static int arfs_create_table(struct mlx5e_priv *priv, int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -396,7 +400,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) spin_unlock_bh(&priv->fs.arfs.arfs_lock); hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { if (arfs_rule->rule) - mlx5_del_flow_rule(arfs_rule->rule); + mlx5_del_flow_rules(arfs_rule->rule); hlist_del(&arfs_rule->hlist); kfree(arfs_rule); } @@ -420,7 +424,7 @@ static void arfs_del_rules(struct mlx5e_priv *priv) hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { cancel_work_sync(&rule->arfs_work); if (rule->rule) - mlx5_del_flow_rule(rule->rule); + mlx5_del_flow_rules(rule->rule); hlist_del(&rule->hlist); kfree(rule); } @@ -462,12 +466,17 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, return NULL; } -static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, - struct arfs_rule *arfs_rule) +static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; - struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest; struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; @@ -544,9 +553,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", @@ -559,14 +566,14 @@ out: } static void arfs_modify_rule_rq(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule, u16 rxq) + struct mlx5_flow_handle *rule, u16 rxq) { struct mlx5_flow_destination dst; int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst.tir_num = priv->direct_tir[rxq].tirn; - err = mlx5_modify_rule_destination(rule, &dst); + err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, "Failed to modfiy aRFS rule destination to rq=%d\n", rxq); @@ -578,7 +585,7 @@ static void arfs_handle_work(struct work_struct *work) struct arfs_rule, arfs_work); struct mlx5e_priv *priv = arfs_rule->priv; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 36fbc6b21a33..1fe80de5d68f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -158,9 +158,14 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; - struct mlx5_flow_rule **rule_p; + struct mlx5_flow_handle **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } - *rule_p = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); @@ -229,20 +231,20 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: if (priv->fs.vlan.untagged_rule) { - mlx5_del_flow_rule(priv->fs.vlan.untagged_rule); + mlx5_del_flow_rules(priv->fs.vlan.untagged_rule); priv->fs.vlan.untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: if (priv->fs.vlan.any_vlan_rule) { - mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule); + mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule); priv->fs.vlan.any_vlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5e_vport_context_update_vlans(priv); if (priv->fs.vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]); + mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]); priv->fs.vlan.active_vlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); @@ -560,7 +562,7 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) for (i = 0; i < MLX5E_NUM_TT; i++) { if (!IS_ERR_OR_NULL(ttc->rules[i])) { - mlx5_del_flow_rule(ttc->rules[i]); + mlx5_del_flow_rules(ttc->rules[i]); ttc->rules[i] = NULL; } } @@ -616,13 +618,19 @@ static struct { }, }; -static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, - u8 proto) +static struct mlx5_flow_handle * +mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) { - struct mlx5_flow_rule *rule; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; + struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -643,10 +651,7 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - dest); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); @@ -660,7 +665,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; struct mlx5e_ttc_table *ttc; - struct mlx5_flow_rule **rules; + struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; int tt; int err; @@ -776,7 +781,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -801,7 +806,7 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai) { if (!IS_ERR_OR_NULL(ai->rule)) { - mlx5_del_flow_rule(ai->rule); + mlx5_del_flow_rules(ai->rule); ai->rule = NULL; } } @@ -809,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; @@ -847,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); @@ -947,7 +955,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1037,7 +1045,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d17c24227900..3691451c728c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -36,7 +36,7 @@ struct mlx5e_ethtool_rule { struct list_head list; struct ethtool_rx_flow_spec flow_spec; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct mlx5e_ethtool_table *eth_ft; }; @@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, MLX5E_ETHTOOL_NUM_ENTRIES); ft = mlx5_create_auto_grouped_flow_table(ns, prio, table_size, - MLX5E_ETHTOOL_NUM_GROUPS, 0); + MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); if (IS_ERR(ft)) return (void *)ft; @@ -284,15 +284,16 @@ static bool outer_header_zero(u32 *match_criteria) size - 1); } -static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct ethtool_rx_flow_spec *fs) +static struct mlx5_flow_handle * +add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) { struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; int err = 0; - u32 action; spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) @@ -303,7 +304,7 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, goto free; if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { @@ -313,12 +314,12 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - rule = mlx5_add_flow_rule(ft, spec, action, - MLX5_FS_DEFAULT_FLOW_TAG, dst); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", @@ -335,7 +336,7 @@ static void del_ethtool_rule(struct mlx5e_priv *priv, struct mlx5e_ethtool_rule *eth_rule) { if (eth_rule->rule) - mlx5_del_flow_rule(eth_rule->rule); + mlx5_del_flow_rules(eth_rule->rule); list_del(ð_rule->list); priv->fs.ethtool.tot_num_rules--; put_flow_table(eth_rule->eth_ft); @@ -475,7 +476,7 @@ int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, { struct mlx5e_ethtool_table *eth_ft; struct mlx5e_ethtool_rule *eth_rule; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; int num_tuples; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 84e8b250e2af..52b4fd53b1a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2850,31 +2850,13 @@ static int mlx5e_set_features(struct net_device *netdev, return err ? -EINVAL : 0; } -#define MXL5_HW_MIN_MTU 64 -#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN) - static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; bool was_opened; - u16 max_mtu; - u16 min_mtu; int err = 0; bool reset; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - - max_mtu = MLX5E_HW2SW_MTU(max_mtu); - min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU); - - if (new_mtu > max_mtu || new_mtu < min_mtu) { - netdev_err(netdev, - "%s: Bad MTU (%d), valid range is: [%d..%d]\n", - __func__, new_mtu, min_mtu, max_mtu); - return -EINVAL; - } - mutex_lock(&priv->state_lock); reset = !priv->params.lro_en && @@ -2944,6 +2926,20 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); } + +static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (min_tx_rate) + return -EOPNOTSUPP; + + return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, + max_tx_rate); +} + static int mlx5_vport_link2ifla(u8 esw_link) { switch (esw_link) { @@ -3000,8 +2996,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -static void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3014,8 +3010,8 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1); } -static void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3251,6 +3247,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, .ndo_set_vf_trust = mlx5e_set_vf_trust, + .ndo_set_vf_rate = mlx5e_set_vf_rate, .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, @@ -3849,6 +3846,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { const struct mlx5e_profile *profile; struct mlx5e_priv *priv; + u16 max_mtu; int err; priv = netdev_priv(netdev); @@ -3879,6 +3877,11 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) mlx5e_init_l2_addr(priv); + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + mlx5e_set_dev_port_mtu(netdev); if (profile->enable) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index bf1c09ca73c0..a84825d59f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -256,6 +256,8 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, .ndo_get_stats64 = mlx5e_get_stats, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, }; static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, @@ -328,7 +330,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; int err; int i; @@ -360,7 +362,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return 0; err_del_flow_rule: - mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5_del_flow_rules(rep->vport_rx_rule); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: @@ -375,7 +377,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) int i; mlx5e_tc_cleanup(priv); - mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5_del_flow_rules(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); for (i = 0; i < priv->params.num_channels; i++) mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6bb21b31cfeb..4b991124bc57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -40,28 +40,43 @@ #include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> +#include <net/vxlan.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "vxlan.h" struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; + struct list_head encap; /* flows sharing the same encap */ struct mlx5_esw_flow_attr *attr; }; +enum { + MLX5_HEADER_TYPE_VXLAN = 0x0, + MLX5_HEADER_TYPE_NVGRE = 0x1, +}; + #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) +static struct mlx5_flow_handle * +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_act flow_act = { + .action = action, + .flow_tag = flow_tag, + .encap_id = 0, + }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; bool table_created = false; if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { @@ -82,7 +97,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_PRIO, MLX5E_TC_TABLE_NUM_ENTRIES, MLX5E_TC_TABLE_NUM_GROUPS, - 0); + 0, 0); if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); @@ -94,9 +109,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rule(priv->fs.tc.t, spec, - action, flow_tag, - &dest); + rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -114,9 +127,10 @@ err_create_ft: return rule; } -static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) +static struct mlx5_flow_handle * +mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -129,7 +143,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule, + struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -140,7 +154,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, if (esw && esw->mode == SRIOV_OFFLOADS) mlx5_eswitch_del_vlan_action(esw, attr); - mlx5_del_flow_rule(rule); + mlx5_del_flow_rules(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -150,6 +164,121 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static void parse_vxlan_attr(struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(key->keyid)); + } +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + + /* Full udp dst port must be given */ + if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) + return -EOPNOTSUPP; + + /* udp src port isn't supported */ + if (memchr_inv(&mask->src, 0, sizeof(mask->src))) + return -EOPNOTSUPP; + + if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + parse_vxlan_attr(spec, f); + else + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(key->dst)); + + } else { /* udp dst port must be given */ + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->mask); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(key->dst)); + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; +} + static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { @@ -167,12 +296,44 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) { netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", f->dissector->used_keys); return -EOPNOTSUPP; } + if ((dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + switch (key->addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + if (parse_tunnel_attr(priv, spec, f)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + /* In decap flow, header pointers should point to the inner + * headers, outer header were already set by parse_tunnel_attr + */ + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = skb_flow_dissector_target(f->dissector, @@ -387,11 +548,243 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static inline int cmp_encap_info(struct mlx5_encap_info *a, + struct mlx5_encap_info *b) +{ + return memcmp(a, b, sizeof(*a)); +} + +static inline int hash_encap_info(struct mlx5_encap_info *info) +{ + return jhash(info, sizeof(*info), 0); +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + __be32 *saddr, + int *out_ttl) +{ + struct rtable *rt; + struct neighbour *n = NULL; + int ttl; + +#if IS_ENABLED(CONFIG_INET) + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + if (IS_ERR(rt)) { + pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr); + return -EOPNOTSUPP; + } +#else + return -EOPNOTSUPP; +#endif + + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { + pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n", + __func__); + ip_rt_put(rt); + return -EOPNOTSUPP; + } + + ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + *saddr = fl4->saddr; + *out_ttl = ttl; + *out_dev = rt->dst.dev; + + return 0; +} + +static int gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IP); + + ip->daddr = daddr; + ip->saddr = saddr; + + ip->ttl = ttl; + ip->protocol = IPPROTO_UDP; + ip->version = 0x4; + ip->ihl = 0x5; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + +static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct flowi4 fl4 = {}; + struct neighbour *n; + char *encap_header; + int encap_size; + __be32 saddr; + int ttl; + int err; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl4.flowi4_proto = IPPROTO_UDP; + fl4.fl4_dport = e->tun_info.tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + fl4.daddr = e->tun_info.daddr; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + &fl4, &n, &saddr, &ttl); + if (err) + goto out; + + e->n = n; + e->out_dev = *out_dev; + + if (!(n->nud_state & NUD_VALID)) { + err = -ENOTSUPP; + goto out; + } + + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + e->tun_info.daddr, + saddr, e->tun_info.tp_dst, + e->tun_info.tun_id); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + kfree(encap_header); + return err; +} + +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned short family = ip_tunnel_info_af(tun_info); + struct ip_tunnel_key *key = &tun_info->key; + struct mlx5_encap_info info; + struct mlx5_encap_entry *e; + struct net_device *out_dev; + uintptr_t hash_key; + bool found = false; + int tunnel_type; + int err; + + /* udp dst port must be given */ + if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) + return -EOPNOTSUPP; + + if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { + info.tp_dst = key->tp_dst; + info.tun_id = tunnel_id_to_key32(key->tun_id); + tunnel_type = MLX5_HEADER_TYPE_VXLAN; + } else { + return -EOPNOTSUPP; + } + + switch (family) { + case AF_INET: + info.daddr = key->u.ipv4.dst; + break; + default: + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&info); + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + if (!cmp_encap_info(&e->tun_info, &info)) { + found = true; + break; + } + } + + if (found) { + attr->encap = e; + return 0; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->tun_info = info; + e->tunnel_type = tunnel_type; + INIT_LIST_HEAD(&e->flows); + + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (err) + goto out_err; + + attr->encap = e; + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + + return err; + +out_err: + kfree(e); + return err; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow *flow) { + struct mlx5_esw_flow_attr *attr = flow->attr; + struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); + bool encap = false; + int err; if (tc_no_actions(exts)) return -EINVAL; @@ -407,22 +800,44 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); - if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + if (switchdev_port_same_parent_id(priv->netdev, + out_dev)) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + out_priv = netdev_priv(out_dev); + attr->out_rep = out_priv->ppriv; + } else if (encap) { + err = mlx5e_attach_encap(priv, info, + out_dev, attr); + if (err) + return err; + list_add(&flow->encap, &attr->encap->flows); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + out_priv = netdev_priv(attr->encap->out_dev); + attr->out_rep = out_priv->ppriv; + } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); return -EINVAL; } + continue; + } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + if (is_tcf_tunnel_set(a)) { + info = tcf_tunnel_info(a); + if (info) + encap = true; + else + return -EOPNOTSUPP; continue; } @@ -439,6 +854,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_tunnel_release(a)) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + continue; + } + return -EINVAL; } return 0; @@ -453,7 +873,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; - struct mlx5_flow_rule *old = NULL; + struct mlx5_flow_handle *old = NULL; struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -487,7 +907,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (fdb_flow) { flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); - err = parse_tc_fdb_actions(priv, f->exts, flow->attr); + err = parse_tc_fdb_actions(priv, f->exts, flow); if (err < 0) goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); @@ -514,7 +934,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto out; err_del_rule: - mlx5_del_flow_rule(flow->rule); + mlx5_del_flow_rules(flow->rule); err_free: if (!old) @@ -524,6 +944,24 @@ out: return err; } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) { + struct list_head *next = flow->encap.next; + + list_del(&flow->encap); + if (list_empty(next)) { + struct mlx5_encap_entry *e; + + e = list_entry(next, struct mlx5_encap_entry, flows); + if (e->n) { + mlx5_encap_dealloc(priv->mdev, e->encap_id); + neigh_release(e->n); + } + hlist_del_rcu(&e->encap_hlist); + kfree(e); + } +} + int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { @@ -539,6 +977,9 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow->rule, flow->attr); + if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + kfree(flow); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index aaca09002ca6..e74a73be5e0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -469,7 +469,7 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev) int mlx5_start_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; - u32 async_event_mask = MLX5_ASYNC_EVENT_MASK; + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; if (MLX5_CAP_GEN(dev, pg)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index be1f7333ab7f..9734ac89826e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -56,7 +56,7 @@ struct esw_uc_addr { /* E-Switch MC FDB table hash node */ struct esw_mc_addr { /* SRIOV only */ struct l2addr_node node; - struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ u32 refcnt; }; @@ -65,7 +65,7 @@ struct vport_addr { struct l2addr_node node; u8 action; u32 vport; - struct mlx5_flow_rule *flow_rule; /* SRIOV only */ + struct mlx5_flow_handle *flow_rule; /* SRIOV only */ /* A flag indicating that mac was added due to mc promiscuous vport */ bool mc_promisc; }; @@ -237,13 +237,14 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) } /* E-Switch FDB */ -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : MLX5_MATCH_OUTER_HEADERS); - struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; void *mv_misc = NULL; @@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = - mlx5_add_flow_rule(esw->fdb_table.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + mlx5_add_flow_rules(esw->fdb_table.fdb, spec, + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", @@ -300,7 +301,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, return flow_rule; } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) { u8 mac_c[ETH_ALEN]; @@ -309,7 +310,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) { u8 mac_c[ETH_ALEN]; @@ -322,7 +323,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) { u8 mac_c[ETH_ALEN]; @@ -361,7 +362,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); @@ -515,7 +516,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) del_l2_table_entry(esw->dev, esw_uc->table_index); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule); + mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; l2addr_hash_del(esw_uc); @@ -562,7 +563,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, case MLX5_ACTION_DEL: if (!iter_vaddr) continue; - mlx5_del_flow_rule(iter_vaddr->flow_rule); + mlx5_del_flow_rules(iter_vaddr->flow_rule); l2addr_hash_del(iter_vaddr); break; } @@ -632,7 +633,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) esw_mc->uplink_rule); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule); + mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; /* If the multicast mac is added as a result of mc promiscuous vport, @@ -645,7 +646,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) update_allmulti_vports(esw, vaddr, esw_mc); if (esw_mc->uplink_rule) - mlx5_del_flow_rule(esw_mc->uplink_rule); + mlx5_del_flow_rules(esw_mc->uplink_rule); l2addr_hash_del(esw_mc); return 0; @@ -828,14 +829,14 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, UPLINK_VPORT); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { - mlx5_del_flow_rule(vport->allmulti_rule); + mlx5_del_flow_rules(vport->allmulti_rule); vport->allmulti_rule = NULL; if (--allmulti_addr->refcnt > 0) goto promisc; if (allmulti_addr->uplink_rule) - mlx5_del_flow_rule(allmulti_addr->uplink_rule); + mlx5_del_flow_rules(allmulti_addr->uplink_rule); allmulti_addr->uplink_rule = NULL; } @@ -847,7 +848,7 @@ promisc: vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, vport_num); } else if (vport->promisc_rule) { - mlx5_del_flow_rule(vport->promisc_rule); + mlx5_del_flow_rules(vport->promisc_rule); vport->promisc_rule = NULL; } } @@ -1018,10 +1019,10 @@ static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) - mlx5_del_flow_rule(vport->egress.allowed_vlan); + mlx5_del_flow_rules(vport->egress.allowed_vlan); if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) - mlx5_del_flow_rule(vport->egress.drop_rule); + mlx5_del_flow_rules(vport->egress.drop_rule); vport->egress.allowed_vlan = NULL; vport->egress.drop_rule = NULL; @@ -1179,10 +1180,10 @@ static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) - mlx5_del_flow_rule(vport->ingress.drop_rule); + mlx5_del_flow_rules(vport->ingress.drop_rule); if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) - mlx5_del_flow_rule(vport->ingress.allow_rule); + mlx5_del_flow_rules(vport->ingress.allow_rule); vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; @@ -1212,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; u8 *smac_v; @@ -1264,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = - mlx5_add_flow_rule(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL); + mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); esw_warn(esw->dev, @@ -1278,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->ingress.drop_rule = - mlx5_add_flow_rule(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL); + mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); esw_warn(esw->dev, @@ -1301,6 +1303,7 @@ out: static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; @@ -1338,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->egress.allowed_vlan = - mlx5_add_flow_rule(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL); + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); esw_warn(esw->dev, @@ -1353,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, /* Drop others rule (star rule) */ memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->egress.drop_rule = - mlx5_add_flow_rule(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL); + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); esw_warn(esw->dev, @@ -1369,6 +1372,147 @@ out: return err; } +/* Vport QoS management */ +static int esw_create_tsar(struct mlx5_eswitch *esw) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return 0; + + if (esw->qos.enabled) + return -EEXIST; + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + &tsar_ctx, + &esw->qos.root_tsar_id); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); + return err; + } + + esw->qos.enabled = true; + return 0; +} + +static void esw_destroy_tsar(struct mlx5_eswitch *esw) +{ + int err; + + if (!esw->qos.enabled) + return; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_id); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err); + + esw->qos.enabled = false; +} + +static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, + u32 initial_max_rate) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_core_dev *dev = esw->dev; + void *vport_elem; + int err = 0; + + if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) || + !MLX5_CAP_QOS(dev, esw_scheduling)) + return 0; + + if (vport->qos.enabled) + return -EEXIST; + + MLX5_SET(scheduling_context, &sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(scheduling_context, &sched_ctx, parent_element_id, + esw->qos.root_tsar_id); + MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, + initial_max_rate); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + &sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + return err; + } + + vport->qos.enabled = true; + return 0; +} + +static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + int err = 0; + + if (!vport->qos.enabled) + return; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + + vport->qos.enabled = false; +} + +static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, + u32 max_rate) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_core_dev *dev = esw->dev; + void *vport_elem; + u32 bitmask = 0; + int err = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + if (!vport->qos.enabled) + return -EIO; + + MLX5_SET(scheduling_context, &sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(scheduling_context, &sched_ctx, parent_element_id, + esw->qos.root_tsar_id); + MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, + max_rate); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + err = mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + &sched_ctx, + vport->qos.esw_tsar_ix, + bitmask); + if (err) { + esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport_num, err); + return err; + } + + return 0; +} + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; @@ -1404,6 +1548,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, esw_vport_egress_config(esw, vport); } } + static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { @@ -1417,6 +1562,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); + /* Attach vport to the eswitch rate limiter */ + if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate)) + esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); + /* Sync with current vport context */ vport->enabled_events = enable_events; vport->enabled = true; @@ -1455,7 +1604,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - + esw_vport_disable_qos(esw, vport_num); if (vport_num && esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, @@ -1501,6 +1650,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) goto abort; + err = esw_create_tsar(esw); + if (err) + esw_warn(esw->dev, "Failed to create eswitch TSAR"); + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); @@ -1535,7 +1688,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_disable_vport(esw, i); if (mc_promisc && mc_promisc->uplink_rule) - mlx5_del_flow_rule(mc_promisc->uplink_rule); + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_tsar(esw); if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); @@ -1627,6 +1782,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + hash_init(esw->offloads.encap_tbl); mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { @@ -1795,6 +1951,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; + ivi->max_tx_rate = evport->info.max_rate; mutex_unlock(&esw->state_lock); return 0; @@ -1888,6 +2045,27 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, return 0; } +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, + int vport, u32 max_rate) +{ + struct mlx5_vport *evport; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + err = esw_vport_qos_config(esw, vport, max_rate); + if (!err) + evport->info.max_rate = max_rate; + + mutex_unlock(&esw->state_lock); + return err; +} + int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2e2938e08cda..40482e841413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -97,16 +97,16 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; - struct mlx5_flow_rule *allow_rule; - struct mlx5_flow_rule *drop_rule; + struct mlx5_flow_handle *allow_rule; + struct mlx5_flow_handle *drop_rule; }; struct vport_egress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allowed_vlans_grp; struct mlx5_flow_group *drop_grp; - struct mlx5_flow_rule *allowed_vlan; - struct mlx5_flow_rule *drop_rule; + struct mlx5_flow_handle *allowed_vlan; + struct mlx5_flow_handle *drop_rule; }; struct mlx5_vport_info { @@ -115,6 +115,7 @@ struct mlx5_vport_info { u8 qos; u64 node_guid; int link_state; + u32 max_rate; bool spoofchk; bool trusted; }; @@ -124,8 +125,8 @@ struct mlx5_vport { int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; - struct mlx5_flow_rule *promisc_rule; - struct mlx5_flow_rule *allmulti_rule; + struct mlx5_flow_handle *promisc_rule; + struct mlx5_flow_handle *allmulti_rule; struct work_struct vport_change_handler; struct vport_ingress ingress; @@ -133,6 +134,11 @@ struct mlx5_vport { struct mlx5_vport_info info; + struct { + bool enabled; + u32 esw_tsar_ix; + } qos; + bool enabled; u16 enabled_events; }; @@ -156,7 +162,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; - struct mlx5_flow_rule *miss_rule; + struct mlx5_flow_handle *miss_rule; int vlan_push_pop_refcount; } offloads; }; @@ -169,7 +175,7 @@ enum { }; struct mlx5_esw_sq { - struct mlx5_flow_rule *send_to_vport_rule; + struct mlx5_flow_handle *send_to_vport_rule; struct list_head list; }; @@ -182,7 +188,7 @@ struct mlx5_eswitch_rep { u8 hw_id[ETH_ALEN]; void *priv_data; - struct mlx5_flow_rule *vport_rx_rule; + struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; u16 vlan; u32 vlan_refcount; @@ -193,6 +199,7 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + DECLARE_HASHTABLE(encap_tbl, 8); }; struct mlx5_eswitch { @@ -209,6 +216,12 @@ struct mlx5_eswitch { */ struct mutex state_lock; struct esw_mc_addr *mc_promisc; + + struct { + bool enabled; + u32 root_tsar_id; + } qos; + struct mlx5_esw_offload offloads; int mode; }; @@ -234,6 +247,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, + int vport, u32 max_rate); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -243,11 +258,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct mlx5_flow_spec; struct mlx5_esw_flow_attr; -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr); -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); enum { @@ -258,6 +273,24 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 +struct mlx5_encap_info { + __be32 daddr; + __be32 tun_id; + __be16 tp_dst; +}; + +struct mlx5_encap_entry { + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct neighbour *n; + struct mlx5_encap_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; @@ -265,6 +298,7 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; + struct mlx5_encap_entry *encap; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d239f5d0ea36..731f28625cc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -43,33 +43,36 @@ enum { FDB_SLOW_PATH }; -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { - struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {0}; struct mlx5_fc *counter = NULL; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; void *misc; - int action; + int i = 0; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); /* per flow vlan pop/push is emulated, don't set that into the firmware */ - action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = attr->out_rep->vport; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport_num = attr->out_rep->vport; + i++; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter = counter; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[i].counter = counter; + i++; } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -80,10 +83,14 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, - spec, action, 0, &dest); + if (attr->encap) + flow_act.encap_id = attr->encap->encap_id; + rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, &flow_act, dest, i); if (IS_ERR(rule)) mlx5_fc_destroy(esw->dev, counter); @@ -270,11 +277,12 @@ out: return err; } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -296,10 +304,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -316,7 +324,7 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, return; list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { - mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + mlx5_del_flow_rules(esw_sq->send_to_vport_rule); list_del(&esw_sq->list); kfree(esw_sq); } @@ -326,7 +334,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num) { - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_esw_sq *esw_sq; int err; int i; @@ -362,8 +370,9 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; int err = 0; @@ -376,10 +385,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); @@ -406,6 +415,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) u32 *flow_group_in; void *match_criteria; int table_size, ix, err = 0; + u32 flags = 0; flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) @@ -420,9 +430,14 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, ESW_OFFLOADS_NUM_ENTRIES, - ESW_OFFLOADS_NUM_GROUPS, 0); + ESW_OFFLOADS_NUM_GROUPS, 0, + flags); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); @@ -431,7 +446,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.fdb = fdb; table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -502,7 +517,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Table\n"); - mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -523,7 +538,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -ENOMEM; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); @@ -586,11 +601,12 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->offloads.vport_rx_group); } -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -611,9 +627,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = tirn; - flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 113c32326333..c4478ecd8056 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -37,6 +37,7 @@ #include "fs_core.h" #include "fs_cmd.h" #include "mlx5_core.h" +#include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) @@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id) + *next_ft, unsigned int *table_id, u32 flags) { + int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } + MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap); + MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap); + switch (op_mod) { case FS_FT_OP_MOD_NORMAL: if (next_ft) { @@ -243,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); @@ -453,27 +459,32 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *bytes = MLX5_GET64(traffic_counter, stats, octets); } -#define MAX_ENCAP_SIZE (128) - -int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id) +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id) { + int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; - u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) + - (MAX_ENCAP_SIZE / sizeof(u32))]; - void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, - encap_header); - void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in, - encap_header); - int inlen = header - (void *)in + size; + void *encap_header_in; + void *header; + int inlen; int err; + u32 *in; - if (size > MAX_ENCAP_SIZE) + if (size > MLX5_CAP_ESW(dev, max_encap_header_size)) return -EINVAL; + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size, + GFP_KERNEL); + if (!in) + return -ENOMEM; + + encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); + header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); + inlen = header - (void *)in + size; + memset(in, 0, inlen); MLX5_SET(alloc_encap_header_in, in, opcode, MLX5_CMD_OP_ALLOC_ENCAP_HEADER); @@ -485,10 +496,11 @@ int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + kfree(in); return err; } -void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id) +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) { u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index c5bc4686c832..8fad80688536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id); + *next_ft, unsigned int *table_id, u32 flags); int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); @@ -89,11 +89,4 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b, u16 id, u64 *packets, u64 *bytes); -int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id); -void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id); - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 914e5466f729..68ec4ea25d50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -153,6 +153,11 @@ static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); static void del_fte(struct fs_node *node); +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2); +static struct mlx5_flow_rule * +find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, unsigned int refcount, @@ -369,6 +374,7 @@ static void del_rule(struct fs_node *node) struct mlx5_core_dev *dev = get_dev(node); int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; + bool update_fte = false; match_value = mlx5_vzalloc(match_len); if (!match_value) { @@ -387,13 +393,23 @@ static void del_rule(struct fs_node *node) list_del(&rule->next_ft); mutex_unlock(&rule->dest_attr.ft->lock); } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && + --fte->dests_size) { + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + update_fte = true; + goto out; + } + if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), - err = mlx5_cmd_update_fte(dev, ft, - fg->id, - modify_mask, - fte); + update_fte = true; + } +out: + if (update_fte && fte->dests_size) { + err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte); if (err) mlx5_core_warn(dev, "%s can't del rule fg id=%d fte_index=%d\n", @@ -444,8 +460,7 @@ static void del_flow_group(struct fs_node *node) fg->id, ft->id); } -static struct fs_fte *alloc_fte(u8 action, - u32 flow_tag, +static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, u32 *match_value, unsigned int index) { @@ -457,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; - fte->flow_tag = flow_tag; + fte->flow_tag = flow_act->flow_tag; fte->index = index; - fte->action = action; + fte->action = flow_act->action; + fte->encap_id = flow_act->encap_id; return fte; } @@ -489,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, enum fs_flow_table_type table_type, - enum fs_flow_table_op_mod op_mod) + enum fs_flow_table_op_mod op_mod, + u32 flags) { struct mlx5_flow_table *ft; @@ -503,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->type = table_type; ft->vport = vport; ft->max_fte = max_fte; + ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); @@ -641,8 +659,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest) +static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -667,6 +685,28 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, return err; } +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest) +{ + int i; + + if (!old_dest) { + if (handle->num_rules != 1) + return -EINVAL; + return _mlx5_modify_rule_destination(handle->rule[0], + new_dest); + } + + for (i = 0; i < handle->num_rules; i++) { + if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr)) + return _mlx5_modify_rule_destination(handle->rule[i], + new_dest); + } + + return -EINVAL; +} + /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ static int connect_fwd_rules(struct mlx5_core_dev *dev, struct mlx5_flow_table *new_next_ft, @@ -689,7 +729,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); mutex_unlock(&old_next_ft->lock); list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { - err = mlx5_modify_rule_destination(iter, &dest); + err = _mlx5_modify_rule_destination(iter, &dest); if (err) pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", new_next_ft->id); @@ -739,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, enum fs_flow_table_op_mod op_mod, u16 vport, int prio, - int max_fte, u32 level) + int max_fte, u32 level, + u32 flags) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -772,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa vport, max_fte ? roundup_pow_of_two(max_fte) : 0, root->table_type, - op_mod); + op_mod, flags); if (!ft) { err = -ENOMEM; goto unlock_root; @@ -782,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, - ft->level, log_table_sz, next_ft, &ft->id); + ft->level, log_table_sz, next_ft, &ft->id, + ft->flags); if (err) goto free_ft; @@ -807,10 +849,11 @@ unlock_root: struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, - u32 level) + u32 level, + u32 flags) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level); + max_fte, level, flags); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, @@ -818,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace u32 level, u16 vport) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level); + max_fte, level, 0); } struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( @@ -826,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( int prio, u32 level) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level); + level, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); @@ -834,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam int prio, int num_flow_table_entries, int max_num_groups, - u32 level) + u32 level, + u32 flags) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); if (IS_ERR(ft)) return ft; @@ -918,55 +962,133 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) return rule; } -/* fte should not be deleted while calling this function */ -static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, - struct mlx5_flow_group *fg, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle *alloc_handle(int num_rules) +{ + struct mlx5_flow_handle *handle; + + handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) * + num_rules, GFP_KERNEL); + if (!handle) + return NULL; + + handle->num_rules = num_rules; + + return handle; +} + +static void destroy_flow_handle(struct fs_fte *fte, + struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *dest, + int i) +{ + for (; --i >= 0;) { + if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) { + fte->dests_size--; + list_del(&handle->rule[i]->node.list); + kfree(handle->rule[i]); + } + } + kfree(handle); +} + +static struct mlx5_flow_handle * +create_flow_handle(struct fs_fte *fte, + struct mlx5_flow_destination *dest, + int dest_num, + int *modify_mask, + bool *new_rule) { + struct mlx5_flow_handle *handle; + struct mlx5_flow_rule *rule = NULL; + static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int type; + int i = 0; + + handle = alloc_handle((dest_num) ? dest_num : 1); + if (!handle) + return ERR_PTR(-ENOMEM); + + do { + if (dest) { + rule = find_flow_rule(fte, dest + i); + if (rule) { + atomic_inc(&rule->node.refcount); + goto rule_found; + } + } + + *new_rule = true; + rule = alloc_rule(dest + i); + if (!rule) + goto free_rules; + + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ + tree_init_node(&rule->node, 1, del_rule); + if (dest && + dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) { + fte->dests_size++; + + type = dest[i].type == + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + *modify_mask |= type ? count : dst; + } +rule_found: + handle->rule[i] = rule; + } while (++i < dest_num); + + return handle; + +free_rules: + destroy_flow_handle(fte, handle, dest, i); + return ERR_PTR(-ENOMEM); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_handle * +add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest, + int dest_num, + bool update_action) +{ + struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct mlx5_flow_rule *rule; int modify_mask = 0; int err; + bool new_rule = false; - rule = alloc_rule(dest); - if (!rule) - return ERR_PTR(-ENOMEM); + handle = create_flow_handle(fte, dest, dest_num, &modify_mask, + &new_rule); + if (IS_ERR(handle) || !new_rule) + goto out; - fs_get_obj(ft, fg->node.parent); - /* Add dest to dests list- we need flow tables to be in the - * end of the list for forward to next prio rules. - */ - tree_init_node(&rule->node, 1, del_rule); - if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - list_add(&rule->node.list, &fte->node.children); - else - list_add_tail(&rule->node.list, &fte->node.children); - if (dest) { - fte->dests_size++; + if (update_action) + modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); - modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ? - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) : - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); - } - - if (fte->dests_size == 1 || !dest) + fs_get_obj(ft, fg->node.parent); + if (!(fte->status & FS_FTE_STATUS_EXISTING)) err = mlx5_cmd_create_fte(get_dev(&ft->node), ft, fg->id, fte); else err = mlx5_cmd_update_fte(get_dev(&ft->node), ft, fg->id, modify_mask, fte); if (err) - goto free_rule; + goto free_handle; fte->status |= FS_FTE_STATUS_EXISTING; - return rule; +out: + return handle; -free_rule: - list_del(&rule->node.list); - kfree(rule); - if (dest) - fte->dests_size--; +free_handle: + destroy_flow_handle(fte, handle, dest, handle->num_rules); return ERR_PTR(err); } @@ -995,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, /* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - u8 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct list_head **prev) { struct fs_fte *fte; int index; index = get_free_fte_index(fg, prev); - fte = alloc_fte(action, flow_tag, match_value, index); + fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) return fte; @@ -1067,71 +1188,81 @@ out: return fg; } +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2) +{ + if (d1->type == d2->type) { + if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + d1->vport_num == d2->vport_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + d1->ft == d2->ft) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + d1->tir_num == d2->tir_num)) + return true; + } + + return false; +} + static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest) { struct mlx5_flow_rule *rule; list_for_each_entry(rule, &fte->node.children, node.list) { - if (rule->dest_attr.type == dest->type) { - if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && - dest->vport_num == rule->dest_attr.vport_num) || - (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && - dest->ft == rule->dest_attr.ft) || - (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR && - dest->tir_num == rule->dest_attr.tir_num)) - return rule; - } + if (mlx5_flow_dests_cmp(&rule->dest_attr, dest)) + return rule; } return NULL; } -static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, - u8 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + u32 *match_value, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) { - struct fs_fte *fte; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; struct list_head *prev; + struct fs_fte *fte; + int i; nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); fs_for_each_fte(fte, fg) { nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && - action == fte->action && flow_tag == fte->flow_tag) { - rule = find_flow_rule(fte, dest); - if (rule) { - atomic_inc(&rule->node.refcount); - unlock_ref_node(&fte->node); - unlock_ref_node(&fg->node); - return rule; + (flow_act->action & fte->action) && + flow_act->flow_tag == fte->flow_tag) { + int old_action = fte->action; + + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action = old_action; + goto unlock_fte; + } else { + goto add_rules; } - rule = add_rule_fte(fte, fg, dest); - unlock_ref_node(&fte->node); - if (IS_ERR(rule)) - goto unlock_fg; - else - goto add_rule; } unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); if (fg->num_ftes >= fg->max_ftes) { - rule = ERR_PTR(-ENOSPC); + handle = ERR_PTR(-ENOSPC); goto unlock_fg; } - fte = create_fte(fg, match_value, action, flow_tag, &prev); + fte = create_fte(fg, match_value, flow_act, &prev); if (IS_ERR(fte)) { - rule = (void *)fte; + handle = (void *)fte; goto unlock_fg; } tree_init_node(&fte->node, 0, del_fte); - rule = add_rule_fte(fte, fg, dest); - if (IS_ERR(rule)) { + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + handle = add_rule_fte(fte, fg, dest, dest_num, false); + if (IS_ERR(handle)) { kfree(fte); goto unlock_fg; } @@ -1140,19 +1271,24 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, tree_add_node(&fte->node, &fg->node); list_add(&fte->node.list, prev); -add_rule: - tree_add_node(&rule->node, &fte->node); +add_rules: + for (i = 0; i < handle->num_rules; i++) { + if (atomic_read(&handle->rule[i]->node.refcount) == 1) + tree_add_node(&handle->rule[i]->node, &fte->node); + } +unlock_fte: + unlock_ref_node(&fte->node); unlock_fg: unlock_ref_node(&fg->node); - return rule; + return handle; } -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule) +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) { struct mlx5_flow_rule *dst; struct fs_fte *fte; - fs_get_obj(fte, rule->node.parent); + fs_get_obj(fte, handle->rule[0]->node.parent); fs_for_each_dst(dst, fte) { if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) @@ -1170,8 +1306,9 @@ static bool counter_is_valid(struct mlx5_fc *counter, u32 action) if (!counter) return false; - /* Hardware support counter for a drop action only */ - return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT); + return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) && + (action & MLX5_FLOW_CONTEXT_ACTION_COUNT); } static bool dest_is_valid(struct mlx5_flow_destination *dest, @@ -1191,18 +1328,22 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } -static struct mlx5_flow_rule * -_mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle * +_mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) + { struct mlx5_flow_group *g; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; + int i; - if (!dest_is_valid(dest, action, ft)) - return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { + if (!dest_is_valid(&dest[i], flow_act->action, ft)) + return ERR_PTR(-EINVAL); + } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) @@ -1211,7 +1352,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, g->mask.match_criteria, spec->match_criteria)) { rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest); + flow_act, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } @@ -1223,8 +1364,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. @@ -1245,22 +1385,22 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); } -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); struct mlx5_flow_destination gen_dest; struct mlx5_flow_table *next_ft = NULL; - struct mlx5_flow_rule *rule = NULL; - u32 sw_action = action; + struct mlx5_flow_handle *handle = NULL; + u32 sw_action = flow_act->action; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); - if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) return ERR_PTR(-EOPNOTSUPP); if (dest) @@ -1271,34 +1411,40 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; gen_dest.ft = next_ft; dest = &gen_dest; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest_num = 1; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { mutex_unlock(&root->chain_lock); return ERR_PTR(-EOPNOTSUPP); } } - rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest); + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { - if (!IS_ERR_OR_NULL(rule) && - (list_empty(&rule->next_ft))) { + if (!IS_ERR_OR_NULL(handle) && + (list_empty(&handle->rule[0]->next_ft))) { mutex_lock(&next_ft->lock); - list_add(&rule->next_ft, &next_ft->fwd_rules); + list_add(&handle->rule[0]->next_ft, + &next_ft->fwd_rules); mutex_unlock(&next_ft->lock); - rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } mutex_unlock(&root->chain_lock); } - return rule; + return handle; } -EXPORT_SYMBOL(mlx5_add_flow_rule); +EXPORT_SYMBOL(mlx5_add_flow_rules); -void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) +void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) { - tree_remove_node(&rule->node); + int i; + + for (i = handle->num_rules - 1; i >= 0; i--) + tree_remove_node(&handle->rule[i]->node); + kfree(handle); } -EXPORT_SYMBOL(mlx5_del_flow_rule); +EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) @@ -1678,7 +1824,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 71ff03bceabb..8e668c63f69e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -94,6 +94,11 @@ struct mlx5_flow_rule { u32 sw_action; }; +struct mlx5_flow_handle { + int num_rules; + struct mlx5_flow_rule *rule[]; +}; + /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; @@ -112,6 +117,7 @@ struct mlx5_flow_table { struct mutex lock; /* FWD rules that point on this flow table */ struct list_head fwd_rules; + u32 flags; }; struct mlx5_fc_cache { @@ -145,6 +151,7 @@ struct fs_fte { u32 flow_tag; u32 index; u32 action; + u32 encap_id; enum fs_fte_status status; struct mlx5_fc *counter; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3eb931585b3e..c07493307a83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1424,6 +1424,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5, PCIe 4.0 VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 187662c8ea96..4762bb9d013c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -92,6 +92,13 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 *element_id); +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 element_id, + u32 modify_bitmask); +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); @@ -114,6 +121,12 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id); +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 104902a93a0b..e651e4c02867 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -36,6 +36,71 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +/* Scheduling element fw management */ +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 *element_id) +{ + u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + void *schedc; + int err; + + schedc = MLX5_ADDR_OF(create_scheduling_element_in, in, + scheduling_context); + MLX5_SET(create_scheduling_element_in, in, opcode, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT); + MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *element_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + return 0; +} + +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 element_id, + u32 modify_bitmask) +{ + u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + void *schedc; + + schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in, + scheduling_context); + MLX5_SET(modify_scheduling_element_in, in, opcode, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT); + MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(modify_scheduling_element_in, in, modify_bitmask, + modify_bitmask); + MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + + MLX5_SET(destroy_scheduling_element_in, in, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + /* Finds an entry where we can register the given rate * If the rate already exists, return the entry where it is registered, * otherwise return the first available entry. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 5989f7cb5462..bac2e5e826e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -29,9 +29,20 @@ config MLXSW_PCI To compile this driver as a module, choose M here: the module will be called mlxsw_pci. +config MLXSW_SWITCHIB + tristate "Mellanox Technologies SwitchIB and SwitchIB-2 support" + depends on MLXSW_CORE && NET_SWITCHDEV + default m + ---help--- + This driver supports Mellanox Technologies SwitchIB and SwitchIB-2 + Infiniband Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_switchib. + config MLXSW_SWITCHX2 tristate "Mellanox Technologies SwitchX-2 support" - depends on MLXSW_CORE && NET_SWITCHDEV + depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV default m ---help--- This driver supports Mellanox Technologies SwitchX-2 Ethernet @@ -42,7 +53,7 @@ config MLXSW_SWITCHX2 config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" - depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q + depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index d20ae1838a64..badef877015a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -3,6 +3,8 @@ mlxsw_core-objs := core.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o mlxsw_pci-objs := pci.o +obj-$(CONFIG_MLXSW_SWITCHIB) += mlxsw_switchib.o +mlxsw_switchib-objs := switchib.o obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 28271bedd957..56e19b0d2f8f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -513,6 +513,11 @@ static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core) * are no more sources in the table, will return resource id 0xFFF to indicate * it. */ + +#define MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID 0xffff +#define MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES 100 +#define MLXSW_CMD_QUERY_RESOURCES_PER_QUERY 32 + static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core, char *out_mbox, int index) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index aa33d58b9f81..600481771c04 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -67,6 +67,7 @@ #include "trap.h" #include "emad.h" #include "reg.h" +#include "resources.h" static LIST_HEAD(mlxsw_core_driver_list); static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); @@ -89,6 +90,23 @@ struct mlxsw_core_pcpu_stats { u32 port_rx_invalid; }; +struct mlxsw_core_port { + struct devlink_port devlink_port; + void *port_driver_priv; + u8 local_port; +}; + +void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) +{ + return mlxsw_core_port->port_driver_priv; +} +EXPORT_SYMBOL(mlxsw_core_port_driver_priv); + +static bool mlxsw_core_port_check(struct mlxsw_core_port *mlxsw_core_port) +{ + return mlxsw_core_port->port_driver_priv != NULL; +} + struct mlxsw_core { struct mlxsw_driver *driver; const struct mlxsw_bus *bus; @@ -111,8 +129,9 @@ struct mlxsw_core { struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ } lag; - struct mlxsw_resources resources; + struct mlxsw_res res; struct mlxsw_hwmon *hwmon; + struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS]; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -822,17 +841,6 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind) spin_lock(&mlxsw_core_driver_list_lock); mlxsw_driver = __driver_find(kind); - if (!mlxsw_driver) { - spin_unlock(&mlxsw_core_driver_list_lock); - request_module(MLXSW_MODULE_ALIAS_PREFIX "%s", kind); - spin_lock(&mlxsw_core_driver_list_lock); - mlxsw_driver = __driver_find(kind); - } - if (mlxsw_driver) { - if (!try_module_get(mlxsw_driver->owner)) - mlxsw_driver = NULL; - } - spin_unlock(&mlxsw_core_driver_list_lock); return mlxsw_driver; } @@ -844,9 +852,6 @@ static void mlxsw_core_driver_put(const char *kind) spin_lock(&mlxsw_core_driver_list_lock); mlxsw_driver = __driver_find(kind); spin_unlock(&mlxsw_core_driver_list_lock); - if (!mlxsw_driver) - return; - module_put(mlxsw_driver->owner); } static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core) @@ -933,6 +938,21 @@ static void *__dl_port(struct devlink_port *devlink_port) return container_of(devlink_port, struct mlxsw_core_port, devlink_port); } +static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type port_type) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->port_type_set) + return -EOPNOTSUPP; + + return mlxsw_driver->port_type_set(mlxsw_core, + mlxsw_core_port->local_port, + port_type); +} + static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, u32 *p_threshold) @@ -941,7 +961,8 @@ static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_port_pool_get) + if (!mlxsw_driver->sb_port_pool_get || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index, pool_index, p_threshold); @@ -955,7 +976,8 @@ static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_port_pool_set) + if (!mlxsw_driver->sb_port_pool_set || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index, pool_index, threshold); @@ -971,7 +993,8 @@ mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_tc_pool_bind_get) + if (!mlxsw_driver->sb_tc_pool_bind_get || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index, tc_index, pool_type, @@ -988,7 +1011,8 @@ mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_tc_pool_bind_set) + if (!mlxsw_driver->sb_tc_pool_bind_set || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index, tc_index, pool_type, @@ -1026,7 +1050,8 @@ mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_occ_port_pool_get) + if (!mlxsw_driver->sb_occ_port_pool_get || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index, pool_index, p_cur, p_max); @@ -1042,7 +1067,8 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); - if (!mlxsw_driver->sb_occ_tc_port_bind_get) + if (!mlxsw_driver->sb_occ_tc_port_bind_get || + !mlxsw_core_port_check(mlxsw_core_port)) return -EOPNOTSUPP; return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port, sb_index, tc_index, @@ -1050,6 +1076,7 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, } static const struct devlink_ops mlxsw_devlink_ops = { + .port_type_set = mlxsw_devlink_port_type_set, .port_split = mlxsw_devlink_port_split, .port_unsplit = mlxsw_devlink_port_unsplit, .sb_pool_get = mlxsw_devlink_sb_pool_get, @@ -1101,14 +1128,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, - &mlxsw_core->resources); + &mlxsw_core->res); if (err) goto err_bus_init; - if (mlxsw_core->resources.max_lag_valid && - mlxsw_core->resources.max_ports_in_lag_valid) { - alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag * - mlxsw_core->resources.max_ports_in_lag; + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) && + MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) { + alloc_size = sizeof(u8) * + MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG) * + MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_core->lag.mapping) { err = -ENOMEM; @@ -1615,7 +1643,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive); static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 port_index) { - return mlxsw_core->resources.max_ports_in_lag * lag_id + + return MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS) * lag_id + port_index; } @@ -1644,7 +1672,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, { int i; - for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) { + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); i++) { int index = mlxsw_core_lag_mapping_index(mlxsw_core, lag_id, i); @@ -1654,34 +1682,97 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); -struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core) +bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id) { - return &mlxsw_core->resources; + return mlxsw_res_valid(&mlxsw_core->res, res_id); } -EXPORT_SYMBOL(mlxsw_core_resources_get); +EXPORT_SYMBOL(mlxsw_core_res_valid); -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, - struct mlxsw_core_port *mlxsw_core_port, u8 local_port, - struct net_device *dev, bool split, u32 split_group) +u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id) +{ + return mlxsw_res_get(&mlxsw_core->res, res_id); +} +EXPORT_SYMBOL(mlxsw_core_res_get); + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port) { struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + int err; - if (split) - devlink_port_split_set(devlink_port, split_group); - devlink_port_type_eth_set(devlink_port, dev); - return devlink_port_register(devlink, devlink_port, local_port); + mlxsw_core_port->local_port = local_port; + err = devlink_port_register(devlink, devlink_port, local_port); + if (err) + memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); + return err; } EXPORT_SYMBOL(mlxsw_core_port_init); -void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port) +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) { + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; devlink_port_unregister(devlink_port); + memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); } EXPORT_SYMBOL(mlxsw_core_port_fini); +void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv, struct net_device *dev, + bool split, u32 split_group) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + if (split) + devlink_port_split_set(devlink_port, split_group); + devlink_port_type_eth_set(devlink_port, dev); +} +EXPORT_SYMBOL(mlxsw_core_port_eth_set); + +void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + devlink_port_type_ib_set(devlink_port, NULL); +} +EXPORT_SYMBOL(mlxsw_core_port_ib_set); + +void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + devlink_port_type_clear(devlink_port); +} +EXPORT_SYMBOL(mlxsw_core_port_clear); + +enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, + u8 local_port) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + return devlink_port->type; +} +EXPORT_SYMBOL(mlxsw_core_port_type_get); + static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, const char *buf, size_t size) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c4f550b6f783..c0acc1b9eb2f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -48,17 +48,11 @@ #include "trap.h" #include "reg.h" - #include "cmd.h" - -#define MLXSW_MODULE_ALIAS_PREFIX "mlxsw-driver-" -#define MODULE_MLXSW_DRIVER_ALIAS(kind) \ - MODULE_ALIAS(MLXSW_MODULE_ALIAS_PREFIX kind) - -#define MLXSW_DEVICE_KIND_SWITCHX2 "switchx2" -#define MLXSW_DEVICE_KIND_SPECTRUM "spectrum" +#include "resources.h" struct mlxsw_core; +struct mlxsw_core_port; struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; @@ -148,23 +142,18 @@ u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 local_port); -struct mlxsw_core_port { - struct devlink_port devlink_port; -}; - -static inline void * -mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) -{ - /* mlxsw_core_port is ensured to always be the first field in driver - * port structure. - */ - return mlxsw_core_port; -} - -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, - struct mlxsw_core_port *mlxsw_core_port, u8 local_port, - struct net_device *dev, bool split, u32 split_group); -void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port); +void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port); +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port); +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port); +void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv, struct net_device *dev, + bool split, u32 split_group); +void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv); +void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port, + void *port_driver_priv); +enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, + u8 local_port); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); @@ -221,11 +210,12 @@ struct mlxsw_config_profile { struct mlxsw_driver { struct list_head list; const char *kind; - struct module *owner; size_t priv_size; int (*init)(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info); void (*fini)(struct mlxsw_core *mlxsw_core); + int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port, + enum devlink_port_type new_type); int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count); int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port); @@ -266,45 +256,23 @@ struct mlxsw_driver { const struct mlxsw_config_profile *profile; }; -struct mlxsw_resources { - u32 max_span_valid:1, - max_lag_valid:1, - max_ports_in_lag_valid:1, - kvd_size_valid:1, - kvd_single_min_size_valid:1, - kvd_double_min_size_valid:1, - max_virtual_routers_valid:1, - max_system_ports_valid:1, - max_vlan_groups_valid:1, - max_regions_valid:1, - max_rif_valid:1; - u8 max_span; - u8 max_lag; - u8 max_ports_in_lag; - u32 kvd_size; - u32 kvd_single_min_size; - u32 kvd_double_min_size; - u16 max_virtual_routers; - u16 max_system_ports; - u16 max_vlan_groups; - u16 max_regions; - u16 max_rif; +bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id); - /* Internal resources. - * Determined by the SW, not queried from the HW. - */ - u32 kvd_single_size; - u32 kvd_double_size; - u32 kvd_linear_size; -}; +#define MLXSW_CORE_RES_VALID(res, short_res_id) \ + mlxsw_core_res_valid(res, MLXSW_RES_ID_##short_res_id) + +u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, + enum mlxsw_res_id res_id); -struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); +#define MLXSW_CORE_RES_GET(res, short_res_id) \ + mlxsw_core_res_get(res, MLXSW_RES_ID_##short_res_id) struct mlxsw_bus { const char *kind; int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile, - struct mlxsw_resources *resources); + struct mlxsw_res *res); void (*fini)(void *bus_priv); bool (*skb_transmit_busy)(void *bus_priv, const struct mlxsw_tx_info *tx_info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 1ac8bf187168..ab710e37af99 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -262,7 +262,7 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) { - char mtcap_pl[MLXSW_REG_MTCAP_LEN]; + char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 sensor_count; int i; @@ -295,7 +295,7 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) { - char mfcr_pl[MLXSW_REG_MFCR_LEN]; + char mfcr_pl[MLXSW_REG_MFCR_LEN] = {0}; enum mlxsw_reg_mfcr_pwm_frequency freq; unsigned int type_index; unsigned int num; diff --git a/drivers/net/ethernet/mellanox/mlxsw/ib.h b/drivers/net/ethernet/mellanox/mlxsw/ib.h new file mode 100644 index 000000000000..ce313aaa6336 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/ib.h @@ -0,0 +1,39 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/ib.h + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MLXSW_IB_H +#define _MLXSW_IB_H + +#define MLXSW_IB_DEFAULT_MTU 4096 + +#endif /* _MLXSW_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index a94dbda6590b..3c95e3ddd9c2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -55,7 +55,7 @@ struct mlxsw_item { }; static inline unsigned int -__mlxsw_item_offset(struct mlxsw_item *item, unsigned short index, +__mlxsw_item_offset(const struct mlxsw_item *item, unsigned short index, size_t typesize) { BUG_ON(index && !item->step); @@ -72,7 +72,8 @@ __mlxsw_item_offset(struct mlxsw_item *item, unsigned short index, typesize); } -static inline u16 __mlxsw_item_get16(char *buf, struct mlxsw_item *item, +static inline u16 __mlxsw_item_get16(const char *buf, + const struct mlxsw_item *item, unsigned short index) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u16)); @@ -87,7 +88,7 @@ static inline u16 __mlxsw_item_get16(char *buf, struct mlxsw_item *item, return tmp; } -static inline void __mlxsw_item_set16(char *buf, struct mlxsw_item *item, +static inline void __mlxsw_item_set16(char *buf, const struct mlxsw_item *item, unsigned short index, u16 val) { unsigned int offset = __mlxsw_item_offset(item, index, @@ -105,7 +106,8 @@ static inline void __mlxsw_item_set16(char *buf, struct mlxsw_item *item, b[offset] = cpu_to_be16(tmp); } -static inline u32 __mlxsw_item_get32(char *buf, struct mlxsw_item *item, +static inline u32 __mlxsw_item_get32(const char *buf, + const struct mlxsw_item *item, unsigned short index) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u32)); @@ -120,7 +122,7 @@ static inline u32 __mlxsw_item_get32(char *buf, struct mlxsw_item *item, return tmp; } -static inline void __mlxsw_item_set32(char *buf, struct mlxsw_item *item, +static inline void __mlxsw_item_set32(char *buf, const struct mlxsw_item *item, unsigned short index, u32 val) { unsigned int offset = __mlxsw_item_offset(item, index, @@ -138,7 +140,8 @@ static inline void __mlxsw_item_set32(char *buf, struct mlxsw_item *item, b[offset] = cpu_to_be32(tmp); } -static inline u64 __mlxsw_item_get64(char *buf, struct mlxsw_item *item, +static inline u64 __mlxsw_item_get64(const char *buf, + const struct mlxsw_item *item, unsigned short index) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); @@ -153,7 +156,7 @@ static inline u64 __mlxsw_item_get64(char *buf, struct mlxsw_item *item, return tmp; } -static inline void __mlxsw_item_set64(char *buf, struct mlxsw_item *item, +static inline void __mlxsw_item_set64(char *buf, const struct mlxsw_item *item, unsigned short index, u64 val) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); @@ -170,8 +173,8 @@ static inline void __mlxsw_item_set64(char *buf, struct mlxsw_item *item, b[offset] = cpu_to_be64(tmp); } -static inline void __mlxsw_item_memcpy_from(char *buf, char *dst, - struct mlxsw_item *item, +static inline void __mlxsw_item_memcpy_from(const char *buf, char *dst, + const struct mlxsw_item *item, unsigned short index) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); @@ -180,7 +183,7 @@ static inline void __mlxsw_item_memcpy_from(char *buf, char *dst, } static inline void __mlxsw_item_memcpy_to(char *buf, const char *src, - struct mlxsw_item *item, + const struct mlxsw_item *item, unsigned short index) { unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); @@ -189,7 +192,8 @@ static inline void __mlxsw_item_memcpy_to(char *buf, const char *src, } static inline u16 -__mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift) +__mlxsw_item_bit_array_offset(const struct mlxsw_item *item, + u16 index, u8 *shift) { u16 max_index, be_index; u16 offset; /* byte offset inside the array */ @@ -212,7 +216,8 @@ __mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift) return item->offset + offset; } -static inline u8 __mlxsw_item_bit_array_get(char *buf, struct mlxsw_item *item, +static inline u8 __mlxsw_item_bit_array_get(const char *buf, + const struct mlxsw_item *item, u16 index) { u8 shift, tmp; @@ -224,7 +229,8 @@ static inline u8 __mlxsw_item_bit_array_get(char *buf, struct mlxsw_item *item, return tmp; } -static inline void __mlxsw_item_bit_array_set(char *buf, struct mlxsw_item *item, +static inline void __mlxsw_item_bit_array_set(char *buf, + const struct mlxsw_item *item, u16 index, u8 val) { u8 shift, tmp; @@ -254,7 +260,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ @@ -275,7 +281,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline u16 \ -mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ @@ -295,7 +301,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ @@ -316,7 +322,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline u32 \ -mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ @@ -336,7 +342,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ @@ -357,7 +363,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline u64 \ -mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ @@ -377,7 +383,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline void \ -mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf, char *dst) \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, char *dst) \ { \ __mlxsw_item_memcpy_from(buf, dst, \ &__ITEM_NAME(_type, _cname, _iname), 0); \ @@ -399,7 +405,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline void \ -mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf, \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, \ unsigned short index, \ char *dst) \ { \ @@ -424,7 +430,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .name = #_type "_" #_cname "_" #_iname, \ }; \ static inline u8 \ -mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, u16 index) \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, u16 index) \ { \ return __mlxsw_item_bit_array_get(buf, \ &__ITEM_NAME(_type, _cname, _iname), \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 912f71f84209..d5cf1ea1c89a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -48,33 +48,17 @@ #include <linux/seq_file.h> #include <linux/string.h> +#include "pci_hw.h" #include "pci.h" #include "core.h" #include "cmd.h" #include "port.h" +#include "resources.h" static const char mlxsw_pci_driver_name[] = "mlxsw_pci"; -static const struct pci_device_id mlxsw_pci_id_table[] = { - {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0}, - {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, - {0, } -}; - static struct dentry *mlxsw_pci_dbg_root; -static const char *mlxsw_pci_device_kind_get(const struct pci_device_id *id) -{ - switch (id->device) { - case PCI_DEVICE_ID_MELLANOX_SWITCHX2: - return MLXSW_DEVICE_KIND_SWITCHX2; - case PCI_DEVICE_ID_MELLANOX_SPECTRUM: - return MLXSW_DEVICE_KIND_SPECTRUM; - default: - BUG(); - } -} - #define mlxsw_pci_write32(mlxsw_pci, reg, val) \ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) #define mlxsw_pci_read32(mlxsw_pci, reg) \ @@ -238,8 +222,9 @@ static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit) return owner_bit != !!(q->consumer_counter & q->count); } -static char *mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q, - u32 (*get_elem_owner_func)(char *)) +static char * +mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q, + u32 (*get_elem_owner_func)(const char *)) { struct mlxsw_pci_queue_elem_info *elem_info; char *elem; @@ -1154,76 +1139,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); } -#define MLXSW_RESOURCES_TABLE_END_ID 0xffff -#define MLXSW_MAX_SPAN_ID 0x2420 -#define MLXSW_MAX_LAG_ID 0x2520 -#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 -#define MLXSW_KVD_SIZE_ID 0x1001 -#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 -#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 -#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 -#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 -#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 -#define MLXSW_MAX_REGIONS_ID 0x2901 -#define MLXSW_MAX_RIF_ID 0x2C02 -#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 -#define MLXSW_RESOURCES_PER_QUERY 32 - -static void mlxsw_pci_resources_query_parse(int id, u64 val, - struct mlxsw_resources *resources) -{ - switch (id) { - case MLXSW_MAX_SPAN_ID: - resources->max_span = val; - resources->max_span_valid = 1; - break; - case MLXSW_MAX_LAG_ID: - resources->max_lag = val; - resources->max_lag_valid = 1; - break; - case MLXSW_MAX_PORTS_IN_LAG_ID: - resources->max_ports_in_lag = val; - resources->max_ports_in_lag_valid = 1; - break; - case MLXSW_KVD_SIZE_ID: - resources->kvd_size = val; - resources->kvd_size_valid = 1; - break; - case MLXSW_KVD_SINGLE_MIN_SIZE_ID: - resources->kvd_single_min_size = val; - resources->kvd_single_min_size_valid = 1; - break; - case MLXSW_KVD_DOUBLE_MIN_SIZE_ID: - resources->kvd_double_min_size = val; - resources->kvd_double_min_size_valid = 1; - break; - case MLXSW_MAX_VIRTUAL_ROUTERS_ID: - resources->max_virtual_routers = val; - resources->max_virtual_routers_valid = 1; - break; - case MLXSW_MAX_SYSTEM_PORT_ID: - resources->max_system_ports = val; - resources->max_system_ports_valid = 1; - break; - case MLXSW_MAX_VLAN_GROUPS_ID: - resources->max_vlan_groups = val; - resources->max_vlan_groups_valid = 1; - break; - case MLXSW_MAX_REGIONS_ID: - resources->max_regions = val; - resources->max_regions_valid = 1; - break; - case MLXSW_MAX_RIF_ID: - resources->max_rif = val; - resources->max_rif_valid = 1; - break; - default: - break; - } -} - static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, - struct mlxsw_resources *resources, + struct mlxsw_res *res, u8 query_enabled) { int index, i; @@ -1237,19 +1154,20 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_zero(mbox); - for (index = 0; index < MLXSW_RESOURCES_QUERY_MAX_QUERIES; index++) { + for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; + index++) { err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index); if (err) return err; - for (i = 0; i < MLXSW_RESOURCES_PER_QUERY; i++) { + for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) { id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); - if (id == MLXSW_RESOURCES_TABLE_END_ID) + if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID) return 0; - mlxsw_pci_resources_query_parse(id, data, resources); + mlxsw_res_parse(res, id, data); } } @@ -1259,13 +1177,14 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, return -EIO; } -static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, - struct mlxsw_resources *resources) +static int +mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, + struct mlxsw_res *res) { - u32 singles_size, doubles_size, linear_size; + u32 single_size, double_size, linear_size; - if (!resources->kvd_single_min_size_valid || - !resources->kvd_double_min_size_valid || + if (!MLXSW_RES_VALID(res, KVD_SINGLE_MIN_SIZE) || + !MLXSW_RES_VALID(res, KVD_DOUBLE_MIN_SIZE) || !profile->used_kvd_split_data) return -EIO; @@ -1277,31 +1196,31 @@ static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *pr * Both sizes must be a multiplications of the * granularity from the profile. */ - doubles_size = (resources->kvd_size - linear_size); - doubles_size *= profile->kvd_hash_double_parts; - doubles_size /= (profile->kvd_hash_double_parts + - profile->kvd_hash_single_parts); - doubles_size /= profile->kvd_hash_granularity; - doubles_size *= profile->kvd_hash_granularity; - singles_size = resources->kvd_size - doubles_size - - linear_size; + double_size = MLXSW_RES_GET(res, KVD_SIZE) - linear_size; + double_size *= profile->kvd_hash_double_parts; + double_size /= profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts; + double_size /= profile->kvd_hash_granularity; + double_size *= profile->kvd_hash_granularity; + single_size = MLXSW_RES_GET(res, KVD_SIZE) - double_size - + linear_size; /* Check results are legal. */ - if (singles_size < resources->kvd_single_min_size || - doubles_size < resources->kvd_double_min_size || - resources->kvd_size < linear_size) + if (single_size < MLXSW_RES_GET(res, KVD_SINGLE_MIN_SIZE) || + double_size < MLXSW_RES_GET(res, KVD_DOUBLE_MIN_SIZE) || + MLXSW_RES_GET(res, KVD_SIZE) < linear_size) return -EIO; - resources->kvd_single_size = singles_size; - resources->kvd_double_size = doubles_size; - resources->kvd_linear_size = linear_size; + MLXSW_RES_SET(res, KVD_SINGLE_SIZE, single_size); + MLXSW_RES_SET(res, KVD_DOUBLE_SIZE, double_size); + MLXSW_RES_SET(res, KVD_LINEAR_SIZE, linear_size); return 0; } static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, const struct mlxsw_config_profile *profile, - struct mlxsw_resources *resources) + struct mlxsw_res *res) { int i; int err; @@ -1390,22 +1309,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } - if (resources->kvd_size_valid) { - err = mlxsw_pci_profile_get_kvd_sizes(profile, resources); + if (MLXSW_RES_VALID(res, KVD_SIZE)) { + err = mlxsw_pci_profile_get_kvd_sizes(profile, res); if (err) return err; mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, - resources->kvd_linear_size); + MLXSW_RES_GET(res, KVD_LINEAR_SIZE)); mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, 1); mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, - resources->kvd_single_size); + MLXSW_RES_GET(res, KVD_SINGLE_SIZE)); mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( mbox, 1); mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, - resources->kvd_double_size); + MLXSW_RES_GET(res, KVD_DOUBLE_SIZE)); } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) @@ -1543,7 +1462,7 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile, - struct mlxsw_resources *resources) + struct mlxsw_res *res) { struct mlxsw_pci *mlxsw_pci = bus_priv; struct pci_dev *pdev = mlxsw_pci->pdev; @@ -1602,12 +1521,12 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_boardinfo; - err = mlxsw_pci_resources_query(mlxsw_pci, mbox, resources, + err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res, profile->resource_query_enable); if (err) goto err_query_resources; - err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources); + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res); if (err) goto err_config_profile; @@ -1617,7 +1536,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, err = request_irq(mlxsw_pci->msix_entry.vector, mlxsw_pci_eq_irq_handler, 0, - mlxsw_pci_driver_name, mlxsw_pci); + mlxsw_pci->bus_info.device_kind, mlxsw_pci); if (err) { dev_err(&pdev->dev, "IRQ request failed\n"); goto err_request_eq_irq; @@ -1863,6 +1782,7 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + const char *driver_name = pdev->driver->name; struct mlxsw_pci *mlxsw_pci; int err; @@ -1876,7 +1796,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_pci_enable_device; } - err = pci_request_regions(pdev, mlxsw_pci_driver_name); + err = pci_request_regions(pdev, driver_name); if (err) { dev_err(&pdev->dev, "pci_request_regions failed\n"); goto err_pci_request_regions; @@ -1927,7 +1847,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_msix_init; } - mlxsw_pci->bus_info.device_kind = mlxsw_pci_device_kind_get(id); + mlxsw_pci->bus_info.device_kind = driver_name; mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; @@ -1979,33 +1899,30 @@ static void mlxsw_pci_remove(struct pci_dev *pdev) kfree(mlxsw_pci); } -static struct pci_driver mlxsw_pci_driver = { - .name = mlxsw_pci_driver_name, - .id_table = mlxsw_pci_id_table, - .probe = mlxsw_pci_probe, - .remove = mlxsw_pci_remove, -}; +int mlxsw_pci_driver_register(struct pci_driver *pci_driver) +{ + pci_driver->probe = mlxsw_pci_probe; + pci_driver->remove = mlxsw_pci_remove; + return pci_register_driver(pci_driver); +} +EXPORT_SYMBOL(mlxsw_pci_driver_register); -static int __init mlxsw_pci_module_init(void) +void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver) { - int err; + pci_unregister_driver(pci_driver); +} +EXPORT_SYMBOL(mlxsw_pci_driver_unregister); +static int __init mlxsw_pci_module_init(void) +{ mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL); if (!mlxsw_pci_dbg_root) return -ENOMEM; - err = pci_register_driver(&mlxsw_pci_driver); - if (err) - goto err_register_driver; return 0; - -err_register_driver: - debugfs_remove_recursive(mlxsw_pci_dbg_root); - return err; } static void __exit mlxsw_pci_module_exit(void) { - pci_unregister_driver(&mlxsw_pci_driver); debugfs_remove_recursive(mlxsw_pci_dbg_root); } @@ -2015,4 +1932,3 @@ module_exit(mlxsw_pci_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox switch PCI interface driver"); -MODULE_DEVICE_TABLE(pci, mlxsw_pci_id_table); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index d942a3e6fa41..d65582325cd5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/pci.h - * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,197 +35,31 @@ #ifndef _MLXSW_PCI_H #define _MLXSW_PCI_H -#include <linux/bitops.h> +#include <linux/pci.h> -#include "item.h" +#define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738 +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 +#define PCI_DEVICE_ID_MELLANOX_SWITCHIB 0xcb20 +#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2 0xcf08 -#define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738 -#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 -#define MLXSW_PCI_BAR0_SIZE (1024 * 1024) /* 1MB */ -#define MLXSW_PCI_PAGE_SIZE 4096 +#if IS_ENABLED(CONFIG_MLXSW_PCI) -#define MLXSW_PCI_CIR_BASE 0x71000 -#define MLXSW_PCI_CIR_IN_PARAM_HI MLXSW_PCI_CIR_BASE -#define MLXSW_PCI_CIR_IN_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x04) -#define MLXSW_PCI_CIR_IN_MODIFIER (MLXSW_PCI_CIR_BASE + 0x08) -#define MLXSW_PCI_CIR_OUT_PARAM_HI (MLXSW_PCI_CIR_BASE + 0x0C) -#define MLXSW_PCI_CIR_OUT_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x10) -#define MLXSW_PCI_CIR_TOKEN (MLXSW_PCI_CIR_BASE + 0x14) -#define MLXSW_PCI_CIR_CTRL (MLXSW_PCI_CIR_BASE + 0x18) -#define MLXSW_PCI_CIR_CTRL_GO_BIT BIT(23) -#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT BIT(22) -#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT 12 -#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 -#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 +int mlxsw_pci_driver_register(struct pci_driver *pci_driver); +void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver); -#define MLXSW_PCI_SW_RESET 0xF0010 -#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 -#define MLXSW_PCI_FW_READY 0xA1844 -#define MLXSW_PCI_FW_READY_MASK 0xFF -#define MLXSW_PCI_FW_READY_MAGIC 0x5E +#else -#define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 -#define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200 -#define MLXSW_PCI_DOORBELL_CQ_OFFSET 0x400 -#define MLXSW_PCI_DOORBELL_EQ_OFFSET 0x600 -#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET 0x800 -#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET 0xA00 +static inline int +mlxsw_pci_driver_register(struct pci_driver *pci_driver) +{ + return 0; +} -#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ - ((offset) + (type_offset) + (num) * 4) +static inline void +mlxsw_pci_driver_unregister(struct pci_driver *pci_driver) +{ +} -#define MLXSW_PCI_CQS_MAX 96 -#define MLXSW_PCI_EQS_COUNT 2 -#define MLXSW_PCI_EQ_ASYNC_NUM 0 -#define MLXSW_PCI_EQ_COMP_NUM 1 - -#define MLXSW_PCI_AQ_PAGES 8 -#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) -#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ -#define MLXSW_PCI_CQE_SIZE 16 /* 16 bytes per element */ -#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ -#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) -#define MLXSW_PCI_CQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE) -#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE) -#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80 - -#define MLXSW_PCI_WQE_SG_ENTRIES 3 -#define MLXSW_PCI_WQE_TYPE_ETHERNET 0xA - -/* pci_wqe_c - * If set it indicates that a completion should be reported upon - * execution of this descriptor. - */ -MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1); - -/* pci_wqe_lp - * Local Processing, set if packet should be processed by the local - * switch hardware: - * For Ethernet EMAD (Direct Route and non Direct Route) - - * must be set if packet destination is local device - * For InfiniBand CTL - must be set if packet destination is local device - * Otherwise it must be clear - * Local Process packets must not exceed the size of 2K (including payload - * and headers). - */ -MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); - -/* pci_wqe_type - * Packet type. - */ -MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); - -/* pci_wqe_byte_count - * Size of i-th scatter/gather entry, 0 if entry is unused. - */ -MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false); - -/* pci_wqe_address - * Physical address of i-th scatter/gather entry. - * Gather Entries must be 2Byte aligned. - */ -MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); - -/* pci_cqe_lag - * Packet arrives from a port which is a LAG - */ -MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); - -/* pci_cqe_system_port/lag_id - * When lag=0: System port on which the packet was received - * When lag=1: - * bits [15:4] LAG ID on which the packet was received - * bits [3:0] sub_port on which the packet was received - */ -MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); -MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); -MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); - -/* pci_cqe_wqe_counter - * WQE count of the WQEs completed on the associated dqn - */ -MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); - -/* pci_cqe_byte_count - * Byte count of received packets including additional two - * Reserved Bytes that are append to the end of the frame. - * Reserved for Send CQE. - */ -MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); - -/* pci_cqe_trap_id - * Trap ID that captured the packet. - */ -MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8); - -/* pci_cqe_crc - * Length include CRC. Indicates the length field includes - * the packet's CRC. - */ -MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1); - -/* pci_cqe_e - * CQE with Error. - */ -MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1); - -/* pci_cqe_sr - * 1 - Send Queue - * 0 - Receive Queue - */ -MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1); - -/* pci_cqe_dqn - * Descriptor Queue (DQ) Number. - */ -MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5); - -/* pci_cqe_owner - * Ownership bit. - */ -MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1); - -/* pci_eqe_event_type - * Event type. - */ -MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8); -#define MLXSW_PCI_EQE_EVENT_TYPE_COMP 0x00 -#define MLXSW_PCI_EQE_EVENT_TYPE_CMD 0x0A - -/* pci_eqe_event_sub_type - * Event type. - */ -MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8); - -/* pci_eqe_cqn - * Completion Queue that triggeret this EQE. - */ -MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7); - -/* pci_eqe_owner - * Ownership bit. - */ -MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); - -/* pci_eqe_cmd_token - * Command completion event - token - */ -MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); - -/* pci_eqe_cmd_status - * Command completion event - status - */ -MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); - -/* pci_eqe_cmd_out_param_h - * Command completion event - output parameter - higher part - */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); - -/* pci_eqe_cmd_out_param_l - * Command completion event - output parameter - lower part - */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); +#endif #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h new file mode 100644 index 000000000000..d147ddd97997 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -0,0 +1,229 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/pci_hw.h + * Copyright (c) 2015-2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_PCI_HW_H +#define _MLXSW_PCI_HW_H + +#include <linux/bitops.h> + +#include "item.h" + +#define MLXSW_PCI_BAR0_SIZE (1024 * 1024) /* 1MB */ +#define MLXSW_PCI_PAGE_SIZE 4096 + +#define MLXSW_PCI_CIR_BASE 0x71000 +#define MLXSW_PCI_CIR_IN_PARAM_HI MLXSW_PCI_CIR_BASE +#define MLXSW_PCI_CIR_IN_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x04) +#define MLXSW_PCI_CIR_IN_MODIFIER (MLXSW_PCI_CIR_BASE + 0x08) +#define MLXSW_PCI_CIR_OUT_PARAM_HI (MLXSW_PCI_CIR_BASE + 0x0C) +#define MLXSW_PCI_CIR_OUT_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x10) +#define MLXSW_PCI_CIR_TOKEN (MLXSW_PCI_CIR_BASE + 0x14) +#define MLXSW_PCI_CIR_CTRL (MLXSW_PCI_CIR_BASE + 0x18) +#define MLXSW_PCI_CIR_CTRL_GO_BIT BIT(23) +#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT BIT(22) +#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT 12 +#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 +#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 + +#define MLXSW_PCI_SW_RESET 0xF0010 +#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_FW_READY 0xA1844 +#define MLXSW_PCI_FW_READY_MASK 0xFFFF +#define MLXSW_PCI_FW_READY_MAGIC 0x5E + +#define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 +#define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200 +#define MLXSW_PCI_DOORBELL_CQ_OFFSET 0x400 +#define MLXSW_PCI_DOORBELL_EQ_OFFSET 0x600 +#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET 0x800 +#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET 0xA00 + +#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ + ((offset) + (type_offset) + (num) * 4) + +#define MLXSW_PCI_CQS_MAX 96 +#define MLXSW_PCI_EQS_COUNT 2 +#define MLXSW_PCI_EQ_ASYNC_NUM 0 +#define MLXSW_PCI_EQ_COMP_NUM 1 + +#define MLXSW_PCI_AQ_PAGES 8 +#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) +#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) +#define MLXSW_PCI_CQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE) +#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE) +#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80 + +#define MLXSW_PCI_WQE_SG_ENTRIES 3 +#define MLXSW_PCI_WQE_TYPE_ETHERNET 0xA + +/* pci_wqe_c + * If set it indicates that a completion should be reported upon + * execution of this descriptor. + */ +MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1); + +/* pci_wqe_lp + * Local Processing, set if packet should be processed by the local + * switch hardware: + * For Ethernet EMAD (Direct Route and non Direct Route) - + * must be set if packet destination is local device + * For InfiniBand CTL - must be set if packet destination is local device + * Otherwise it must be clear + * Local Process packets must not exceed the size of 2K (including payload + * and headers). + */ +MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); + +/* pci_wqe_type + * Packet type. + */ +MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); + +/* pci_wqe_byte_count + * Size of i-th scatter/gather entry, 0 if entry is unused. + */ +MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false); + +/* pci_wqe_address + * Physical address of i-th scatter/gather entry. + * Gather Entries must be 2Byte aligned. + */ +MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); + +/* pci_cqe_lag + * Packet arrives from a port which is a LAG + */ +MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); + +/* pci_cqe_system_port/lag_id + * When lag=0: System port on which the packet was received + * When lag=1: + * bits [15:4] LAG ID on which the packet was received + * bits [3:0] sub_port on which the packet was received + */ +MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); + +/* pci_cqe_wqe_counter + * WQE count of the WQEs completed on the associated dqn + */ +MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); + +/* pci_cqe_byte_count + * Byte count of received packets including additional two + * Reserved Bytes that are append to the end of the frame. + * Reserved for Send CQE. + */ +MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); + +/* pci_cqe_trap_id + * Trap ID that captured the packet. + */ +MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8); + +/* pci_cqe_crc + * Length include CRC. Indicates the length field includes + * the packet's CRC. + */ +MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1); + +/* pci_cqe_e + * CQE with Error. + */ +MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1); + +/* pci_cqe_sr + * 1 - Send Queue + * 0 - Receive Queue + */ +MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1); + +/* pci_cqe_dqn + * Descriptor Queue (DQ) Number. + */ +MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5); + +/* pci_cqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1); + +/* pci_eqe_event_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8); +#define MLXSW_PCI_EQE_EVENT_TYPE_COMP 0x00 +#define MLXSW_PCI_EQE_EVENT_TYPE_CMD 0x0A + +/* pci_eqe_event_sub_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8); + +/* pci_eqe_cqn + * Completion Queue that triggeret this EQE. + */ +MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7); + +/* pci_eqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); + +/* pci_eqe_cmd_token + * Command completion event - token + */ +MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); + +/* pci_eqe_cmd_status + * Command completion event - status + */ +MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); + +/* pci_eqe_cmd_out_param_h + * Command completion event - output parameter - higher part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); + +/* pci_eqe_cmd_out_param_l + * Command completion event - output parameter - lower part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index af371a82c35b..3d42146473b3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -44,6 +44,7 @@ #define MLXSW_PORT_SWID_DISABLED_PORT 255 #define MLXSW_PORT_SWID_ALL_SWIDS 254 +#define MLXSW_PORT_SWID_TYPE_IB 1 #define MLXSW_PORT_SWID_TYPE_ETH 2 #define MLXSW_PORT_MID 0xd000 @@ -51,6 +52,9 @@ #define MLXSW_PORT_MAX_PHY_PORTS 0x40 #define MLXSW_PORT_MAX_PORTS (MLXSW_PORT_MAX_PHY_PORTS + 1) +#define MLXSW_PORT_MAX_IB_PHY_PORTS 36 +#define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1) + #define MLXSW_PORT_DEVID_BITS_OFFSET 10 #define MLXSW_PORT_PHY_BITS_OFFSET 4 #define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6460c7256f2b..edad7cb62475 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -48,8 +48,16 @@ struct mlxsw_reg_info { u16 id; u16 len; /* In u8 */ + const char *name; }; +#define MLXSW_REG_DEFINE(_name, _id, _len) \ +static const struct mlxsw_reg_info mlxsw_reg_##_name = { \ + .id = _id, \ + .len = _len, \ + .name = #_name, \ +} + #define MLXSW_REG(type) (&mlxsw_reg_##type) #define MLXSW_REG_LEN(type) MLXSW_REG(type)->len #define MLXSW_REG_ZERO(type, payload) memset(payload, 0, MLXSW_REG(type)->len) @@ -61,10 +69,7 @@ struct mlxsw_reg_info { #define MLXSW_REG_SGCR_ID 0x2000 #define MLXSW_REG_SGCR_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_sgcr = { - .id = MLXSW_REG_SGCR_ID, - .len = MLXSW_REG_SGCR_LEN, -}; +MLXSW_REG_DEFINE(sgcr, MLXSW_REG_SGCR_ID, MLXSW_REG_SGCR_LEN); /* reg_sgcr_llb * Link Local Broadcast (Default=0) @@ -87,10 +92,7 @@ static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb) #define MLXSW_REG_SPAD_ID 0x2002 #define MLXSW_REG_SPAD_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_spad = { - .id = MLXSW_REG_SPAD_ID, - .len = MLXSW_REG_SPAD_LEN, -}; +MLXSW_REG_DEFINE(spad, MLXSW_REG_SPAD_ID, MLXSW_REG_SPAD_LEN); /* reg_spad_base_mac * Base MAC address for the switch partitions. @@ -109,10 +111,7 @@ MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6); #define MLXSW_REG_SMID_ID 0x2007 #define MLXSW_REG_SMID_LEN 0x240 -static const struct mlxsw_reg_info mlxsw_reg_smid = { - .id = MLXSW_REG_SMID_ID, - .len = MLXSW_REG_SMID_LEN, -}; +MLXSW_REG_DEFINE(smid, MLXSW_REG_SMID_ID, MLXSW_REG_SMID_LEN); /* reg_smid_swid * Switch partition ID. @@ -156,10 +155,7 @@ static inline void mlxsw_reg_smid_pack(char *payload, u16 mid, #define MLXSW_REG_SSPR_ID 0x2008 #define MLXSW_REG_SSPR_LEN 0x8 -static const struct mlxsw_reg_info mlxsw_reg_sspr = { - .id = MLXSW_REG_SSPR_ID, - .len = MLXSW_REG_SSPR_LEN, -}; +MLXSW_REG_DEFINE(sspr, MLXSW_REG_SSPR_ID, MLXSW_REG_SSPR_LEN); /* reg_sspr_m * Master - if set, then the record describes the master system port. @@ -215,10 +211,7 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u8 local_port) #define MLXSW_REG_SFDAT_ID 0x2009 #define MLXSW_REG_SFDAT_LEN 0x8 -static const struct mlxsw_reg_info mlxsw_reg_sfdat = { - .id = MLXSW_REG_SFDAT_ID, - .len = MLXSW_REG_SFDAT_LEN, -}; +MLXSW_REG_DEFINE(sfdat, MLXSW_REG_SFDAT_ID, MLXSW_REG_SFDAT_LEN); /* reg_sfdat_swid * Switch partition ID. @@ -256,10 +249,7 @@ static inline void mlxsw_reg_sfdat_pack(char *payload, u32 age_time) #define MLXSW_REG_SFD_LEN (MLXSW_REG_SFD_BASE_LEN + \ MLXSW_REG_SFD_REC_LEN * MLXSW_REG_SFD_REC_MAX_COUNT) -static const struct mlxsw_reg_info mlxsw_reg_sfd = { - .id = MLXSW_REG_SFD_ID, - .len = MLXSW_REG_SFD_LEN, -}; +MLXSW_REG_DEFINE(sfd, MLXSW_REG_SFD_ID, MLXSW_REG_SFD_LEN); /* reg_sfd_swid * Switch partition ID for queries. Reserved on Write. @@ -580,10 +570,7 @@ mlxsw_reg_sfd_mc_pack(char *payload, int rec_index, #define MLXSW_REG_SFN_LEN (MLXSW_REG_SFN_BASE_LEN + \ MLXSW_REG_SFN_REC_LEN * MLXSW_REG_SFN_REC_MAX_COUNT) -static const struct mlxsw_reg_info mlxsw_reg_sfn = { - .id = MLXSW_REG_SFN_ID, - .len = MLXSW_REG_SFN_LEN, -}; +MLXSW_REG_DEFINE(sfn, MLXSW_REG_SFN_ID, MLXSW_REG_SFN_LEN); /* reg_sfn_swid * Switch partition ID. @@ -701,10 +688,7 @@ static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, #define MLXSW_REG_SPMS_ID 0x200D #define MLXSW_REG_SPMS_LEN 0x404 -static const struct mlxsw_reg_info mlxsw_reg_spms = { - .id = MLXSW_REG_SPMS_ID, - .len = MLXSW_REG_SPMS_LEN, -}; +MLXSW_REG_DEFINE(spms, MLXSW_REG_SPMS_ID, MLXSW_REG_SPMS_LEN); /* reg_spms_local_port * Local port number. @@ -748,10 +732,7 @@ static inline void mlxsw_reg_spms_vid_pack(char *payload, u16 vid, #define MLXSW_REG_SPVID_ID 0x200E #define MLXSW_REG_SPVID_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_spvid = { - .id = MLXSW_REG_SPVID_ID, - .len = MLXSW_REG_SPVID_LEN, -}; +MLXSW_REG_DEFINE(spvid, MLXSW_REG_SPVID_ID, MLXSW_REG_SPVID_LEN); /* reg_spvid_local_port * Local port number. @@ -792,10 +773,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid) #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) -static const struct mlxsw_reg_info mlxsw_reg_spvm = { - .id = MLXSW_REG_SPVM_ID, - .len = MLXSW_REG_SPVM_LEN, -}; +MLXSW_REG_DEFINE(spvm, MLXSW_REG_SPVM_ID, MLXSW_REG_SPVM_LEN); /* reg_spvm_pt * Priority tagged. If this bit is set, packets forwarded to the port with @@ -891,10 +869,7 @@ static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port, #define MLXSW_REG_SPAFT_ID 0x2010 #define MLXSW_REG_SPAFT_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_spaft = { - .id = MLXSW_REG_SPAFT_ID, - .len = MLXSW_REG_SPAFT_LEN, -}; +MLXSW_REG_DEFINE(spaft, MLXSW_REG_SPAFT_ID, MLXSW_REG_SPAFT_LEN); /* reg_spaft_local_port * Local port number. @@ -947,10 +922,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port, #define MLXSW_REG_SFGC_ID 0x2011 #define MLXSW_REG_SFGC_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_sfgc = { - .id = MLXSW_REG_SFGC_ID, - .len = MLXSW_REG_SFGC_LEN, -}; +MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN); enum mlxsw_reg_sfgc_type { MLXSW_REG_SFGC_TYPE_BROADCAST, @@ -1045,10 +1017,7 @@ mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type, #define MLXSW_REG_SFTR_ID 0x2012 #define MLXSW_REG_SFTR_LEN 0x420 -static const struct mlxsw_reg_info mlxsw_reg_sftr = { - .id = MLXSW_REG_SFTR_ID, - .len = MLXSW_REG_SFTR_LEN, -}; +MLXSW_REG_DEFINE(sftr, MLXSW_REG_SFTR_ID, MLXSW_REG_SFTR_LEN); /* reg_sftr_swid * Switch partition ID with which to associate the port. @@ -1118,10 +1087,7 @@ static inline void mlxsw_reg_sftr_pack(char *payload, #define MLXSW_REG_SFDF_ID 0x2013 #define MLXSW_REG_SFDF_LEN 0x14 -static const struct mlxsw_reg_info mlxsw_reg_sfdf = { - .id = MLXSW_REG_SFDF_ID, - .len = MLXSW_REG_SFDF_LEN, -}; +MLXSW_REG_DEFINE(sfdf, MLXSW_REG_SFDF_ID, MLXSW_REG_SFDF_LEN); /* reg_sfdf_swid * Switch partition ID. @@ -1205,10 +1171,7 @@ MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10); #define MLXSW_REG_SLDR_ID 0x2014 #define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ -static const struct mlxsw_reg_info mlxsw_reg_sldr = { - .id = MLXSW_REG_SLDR_ID, - .len = MLXSW_REG_SLDR_LEN, -}; +MLXSW_REG_DEFINE(sldr, MLXSW_REG_SLDR_ID, MLXSW_REG_SLDR_LEN); enum mlxsw_reg_sldr_op { /* Indicates a creation of a new LAG-ID, lag_id must be valid */ @@ -1288,10 +1251,7 @@ static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, #define MLXSW_REG_SLCR_ID 0x2015 #define MLXSW_REG_SLCR_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_slcr = { - .id = MLXSW_REG_SLCR_ID, - .len = MLXSW_REG_SLCR_LEN, -}; +MLXSW_REG_DEFINE(slcr, MLXSW_REG_SLCR_ID, MLXSW_REG_SLCR_LEN); enum mlxsw_reg_slcr_pp { /* Global Configuration (for all ports) */ @@ -1404,10 +1364,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) #define MLXSW_REG_SLCOR_ID 0x2016 #define MLXSW_REG_SLCOR_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_slcor = { - .id = MLXSW_REG_SLCOR_ID, - .len = MLXSW_REG_SLCOR_LEN, -}; +MLXSW_REG_DEFINE(slcor, MLXSW_REG_SLCOR_ID, MLXSW_REG_SLCOR_LEN); enum mlxsw_reg_slcor_col { /* Port is added with collector disabled */ @@ -1490,10 +1447,7 @@ static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, #define MLXSW_REG_SPMLR_ID 0x2018 #define MLXSW_REG_SPMLR_LEN 0x8 -static const struct mlxsw_reg_info mlxsw_reg_spmlr = { - .id = MLXSW_REG_SPMLR_ID, - .len = MLXSW_REG_SPMLR_LEN, -}; +MLXSW_REG_DEFINE(spmlr, MLXSW_REG_SPMLR_ID, MLXSW_REG_SPMLR_LEN); /* reg_spmlr_local_port * Local port number. @@ -1544,10 +1498,7 @@ static inline void mlxsw_reg_spmlr_pack(char *payload, u8 local_port, #define MLXSW_REG_SVFA_ID 0x201C #define MLXSW_REG_SVFA_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_svfa = { - .id = MLXSW_REG_SVFA_ID, - .len = MLXSW_REG_SVFA_LEN, -}; +MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN); /* reg_svfa_swid * Switch partition ID. @@ -1636,10 +1587,7 @@ static inline void mlxsw_reg_svfa_pack(char *payload, u8 local_port, #define MLXSW_REG_SVPE_ID 0x201E #define MLXSW_REG_SVPE_LEN 0x4 -static const struct mlxsw_reg_info mlxsw_reg_svpe = { - .id = MLXSW_REG_SVPE_ID, - .len = MLXSW_REG_SVPE_LEN, -}; +MLXSW_REG_DEFINE(svpe, MLXSW_REG_SVPE_ID, MLXSW_REG_SVPE_LEN); /* reg_svpe_local_port * Local port number @@ -1672,10 +1620,7 @@ static inline void mlxsw_reg_svpe_pack(char *payload, u8 local_port, #define MLXSW_REG_SFMR_ID 0x201F #define MLXSW_REG_SFMR_LEN 0x18 -static const struct mlxsw_reg_info mlxsw_reg_sfmr = { - .id = MLXSW_REG_SFMR_ID, - .len = MLXSW_REG_SFMR_LEN, -}; +MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN); enum mlxsw_reg_sfmr_op { MLXSW_REG_SFMR_OP_CREATE_FID, @@ -1762,10 +1707,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload, MLXSW_REG_SPVMLR_REC_LEN * \ MLXSW_REG_SPVMLR_REC_MAX_COUNT) -static const struct mlxsw_reg_info mlxsw_reg_spvmlr = { - .id = MLXSW_REG_SPVMLR_ID, - .len = MLXSW_REG_SPVMLR_LEN, -}; +MLXSW_REG_DEFINE(spvmlr, MLXSW_REG_SPVMLR_ID, MLXSW_REG_SPVMLR_LEN); /* reg_spvmlr_local_port * Local ingress port. @@ -1823,10 +1765,7 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, #define MLXSW_REG_QTCT_ID 0x400A #define MLXSW_REG_QTCT_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_qtct = { - .id = MLXSW_REG_QTCT_ID, - .len = MLXSW_REG_QTCT_LEN, -}; +MLXSW_REG_DEFINE(qtct, MLXSW_REG_QTCT_ID, MLXSW_REG_QTCT_LEN); /* reg_qtct_local_port * Local port number. @@ -1875,10 +1814,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, #define MLXSW_REG_QEEC_ID 0x400D #define MLXSW_REG_QEEC_LEN 0x1C -static const struct mlxsw_reg_info mlxsw_reg_qeec = { - .id = MLXSW_REG_QEEC_ID, - .len = MLXSW_REG_QEEC_LEN, -}; +MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); /* reg_qeec_local_port * Local port number. @@ -2000,10 +1936,7 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, #define MLXSW_REG_PMLP_ID 0x5002 #define MLXSW_REG_PMLP_LEN 0x40 -static const struct mlxsw_reg_info mlxsw_reg_pmlp = { - .id = MLXSW_REG_PMLP_ID, - .len = MLXSW_REG_PMLP_LEN, -}; +MLXSW_REG_DEFINE(pmlp, MLXSW_REG_PMLP_ID, MLXSW_REG_PMLP_LEN); /* reg_pmlp_rxtx * 0 - Tx value is used for both Tx and Rx. @@ -2059,10 +1992,7 @@ static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port) #define MLXSW_REG_PMTU_ID 0x5003 #define MLXSW_REG_PMTU_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_pmtu = { - .id = MLXSW_REG_PMTU_ID, - .len = MLXSW_REG_PMTU_LEN, -}; +MLXSW_REG_DEFINE(pmtu, MLXSW_REG_PMTU_ID, MLXSW_REG_PMTU_LEN); /* reg_pmtu_local_port * Local port number. @@ -2116,10 +2046,7 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port, #define MLXSW_REG_PTYS_ID 0x5004 #define MLXSW_REG_PTYS_LEN 0x40 -static const struct mlxsw_reg_info mlxsw_reg_ptys = { - .id = MLXSW_REG_PTYS_ID, - .len = MLXSW_REG_PTYS_LEN, -}; +MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN); /* reg_ptys_local_port * Local port number. @@ -2127,6 +2054,7 @@ static const struct mlxsw_reg_info mlxsw_reg_ptys = { */ MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8); +#define MLXSW_REG_PTYS_PROTO_MASK_IB BIT(0) #define MLXSW_REG_PTYS_PROTO_MASK_ETH BIT(2) /* reg_ptys_proto_mask @@ -2185,18 +2113,61 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); */ MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32); +/* reg_ptys_ib_link_width_cap + * IB port supported widths. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16); + +#define MLXSW_REG_PTYS_IB_SPEED_SDR BIT(0) +#define MLXSW_REG_PTYS_IB_SPEED_DDR BIT(1) +#define MLXSW_REG_PTYS_IB_SPEED_QDR BIT(2) +#define MLXSW_REG_PTYS_IB_SPEED_FDR10 BIT(3) +#define MLXSW_REG_PTYS_IB_SPEED_FDR BIT(4) +#define MLXSW_REG_PTYS_IB_SPEED_EDR BIT(5) + +/* reg_ptys_ib_proto_cap + * IB port supported speeds and protocols. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16); + /* reg_ptys_eth_proto_admin * Speed and protocol to set port to. * Access: RW */ MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); +/* reg_ptys_ib_link_width_admin + * IB width to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16); + +/* reg_ptys_ib_proto_admin + * IB speeds and protocols to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16); + /* reg_ptys_eth_proto_oper * The current speed and protocol configured for the port. * Access: RO */ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); +/* reg_ptys_ib_link_width_oper + * The current IB width to set port to. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16); + +/* reg_ptys_ib_proto_oper + * The current IB speed and protocol. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16); + /* reg_ptys_eth_proto_lp_advertise * The protocols that were advertised by the link partner during * autonegotiation. @@ -2204,8 +2175,8 @@ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); */ MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32); -static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, - u32 proto_admin) +static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port, + u32 proto_admin) { MLXSW_REG_ZERO(ptys, payload); mlxsw_reg_ptys_local_port_set(payload, local_port); @@ -2213,9 +2184,10 @@ static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin); } -static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap, - u32 *p_eth_proto_adm, - u32 *p_eth_proto_oper) +static inline void mlxsw_reg_ptys_eth_unpack(char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_adm, + u32 *p_eth_proto_oper) { if (p_eth_proto_cap) *p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload); @@ -2225,6 +2197,33 @@ static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap, *p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload); } +static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port, + u16 proto_admin, u16 link_width) +{ + MLXSW_REG_ZERO(ptys, payload); + mlxsw_reg_ptys_local_port_set(payload, local_port); + mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_IB); + mlxsw_reg_ptys_ib_proto_admin_set(payload, proto_admin); + mlxsw_reg_ptys_ib_link_width_admin_set(payload, link_width); +} + +static inline void mlxsw_reg_ptys_ib_unpack(char *payload, u16 *p_ib_proto_cap, + u16 *p_ib_link_width_cap, + u16 *p_ib_proto_oper, + u16 *p_ib_link_width_oper) +{ + if (p_ib_proto_cap) + *p_ib_proto_cap = mlxsw_reg_ptys_ib_proto_cap_get(payload); + if (p_ib_link_width_cap) + *p_ib_link_width_cap = + mlxsw_reg_ptys_ib_link_width_cap_get(payload); + if (p_ib_proto_oper) + *p_ib_proto_oper = mlxsw_reg_ptys_ib_proto_oper_get(payload); + if (p_ib_link_width_oper) + *p_ib_link_width_oper = + mlxsw_reg_ptys_ib_link_width_oper_get(payload); +} + /* PPAD - Port Physical Address Register * ------------------------------------- * The PPAD register configures the per port physical MAC address. @@ -2232,10 +2231,7 @@ static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap, #define MLXSW_REG_PPAD_ID 0x5005 #define MLXSW_REG_PPAD_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_ppad = { - .id = MLXSW_REG_PPAD_ID, - .len = MLXSW_REG_PPAD_LEN, -}; +MLXSW_REG_DEFINE(ppad, MLXSW_REG_PPAD_ID, MLXSW_REG_PPAD_LEN); /* reg_ppad_single_base_mac * 0: base_mac, local port should be 0 and mac[7:0] is @@ -2273,10 +2269,7 @@ static inline void mlxsw_reg_ppad_pack(char *payload, bool single_base_mac, #define MLXSW_REG_PAOS_ID 0x5006 #define MLXSW_REG_PAOS_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_paos = { - .id = MLXSW_REG_PAOS_ID, - .len = MLXSW_REG_PAOS_LEN, -}; +MLXSW_REG_DEFINE(paos, MLXSW_REG_PAOS_ID, MLXSW_REG_PAOS_LEN); /* reg_paos_swid * Switch partition ID with which to associate the port. @@ -2356,10 +2349,7 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port, #define MLXSW_REG_PFCC_ID 0x5007 #define MLXSW_REG_PFCC_LEN 0x20 -static const struct mlxsw_reg_info mlxsw_reg_pfcc = { - .id = MLXSW_REG_PFCC_ID, - .len = MLXSW_REG_PFCC_LEN, -}; +MLXSW_REG_DEFINE(pfcc, MLXSW_REG_PFCC_ID, MLXSW_REG_PFCC_LEN); /* reg_pfcc_local_port * Local port number. @@ -2495,10 +2485,7 @@ static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) #define MLXSW_REG_PPCNT_ID 0x5008 #define MLXSW_REG_PPCNT_LEN 0x100 -static const struct mlxsw_reg_info mlxsw_reg_ppcnt = { - .id = MLXSW_REG_PPCNT_ID, - .len = MLXSW_REG_PPCNT_LEN, -}; +MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN); /* reg_ppcnt_swid * For HCA: must be always 0. @@ -2761,6 +2748,27 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc); } +/* PLIB - Port Local to InfiniBand Port + * ------------------------------------ + * The PLIB register performs mapping from Local Port into InfiniBand Port. + */ +#define MLXSW_REG_PLIB_ID 0x500A +#define MLXSW_REG_PLIB_LEN 0x10 + +MLXSW_REG_DEFINE(plib, MLXSW_REG_PLIB_ID, MLXSW_REG_PLIB_LEN); + +/* reg_plib_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, plib, local_port, 0x00, 16, 8); + +/* reg_plib_ib_port + * InfiniBand port remapping for local_port. + * Access: RW + */ +MLXSW_ITEM32(reg, plib, ib_port, 0x00, 0, 8); + /* PPTB - Port Prio To Buffer Register * ----------------------------------- * Configures the switch priority to buffer table. @@ -2768,10 +2776,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, #define MLXSW_REG_PPTB_ID 0x500B #define MLXSW_REG_PPTB_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_pptb = { - .id = MLXSW_REG_PPTB_ID, - .len = MLXSW_REG_PPTB_LEN, -}; +MLXSW_REG_DEFINE(pptb, MLXSW_REG_PPTB_ID, MLXSW_REG_PPTB_LEN); enum { MLXSW_REG_PPTB_MM_UM, @@ -2865,10 +2870,7 @@ static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio, #define MLXSW_REG_PBMC_ID 0x500C #define MLXSW_REG_PBMC_LEN 0x6C -static const struct mlxsw_reg_info mlxsw_reg_pbmc = { - .id = MLXSW_REG_PBMC_ID, - .len = MLXSW_REG_PBMC_LEN, -}; +MLXSW_REG_DEFINE(pbmc, MLXSW_REG_PBMC_ID, MLXSW_REG_PBMC_LEN); /* reg_pbmc_local_port * Local port number. @@ -2978,10 +2980,7 @@ static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload, #define MLXSW_REG_PSPA_ID 0x500D #define MLXSW_REG_PSPA_LEN 0x8 -static const struct mlxsw_reg_info mlxsw_reg_pspa = { - .id = MLXSW_REG_PSPA_ID, - .len = MLXSW_REG_PSPA_LEN, -}; +MLXSW_REG_DEFINE(pspa, MLXSW_REG_PSPA_ID, MLXSW_REG_PSPA_LEN); /* reg_pspa_swid * Switch partition ID. @@ -3017,10 +3016,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) #define MLXSW_REG_HTGT_ID 0x7002 #define MLXSW_REG_HTGT_LEN 0x100 -static const struct mlxsw_reg_info mlxsw_reg_htgt = { - .id = MLXSW_REG_HTGT_ID, - .len = MLXSW_REG_HTGT_LEN, -}; +MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN); /* reg_htgt_swid * Switch partition ID. @@ -3154,10 +3150,7 @@ static inline void mlxsw_reg_htgt_pack(char *payload, #define MLXSW_REG_HPKT_ID 0x7003 #define MLXSW_REG_HPKT_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_hpkt = { - .id = MLXSW_REG_HPKT_ID, - .len = MLXSW_REG_HPKT_LEN, -}; +MLXSW_REG_DEFINE(hpkt, MLXSW_REG_HPKT_ID, MLXSW_REG_HPKT_LEN); enum { MLXSW_REG_HPKT_ACK_NOT_REQUIRED, @@ -3256,10 +3249,7 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) #define MLXSW_REG_RGCR_ID 0x8001 #define MLXSW_REG_RGCR_LEN 0x28 -static const struct mlxsw_reg_info mlxsw_reg_rgcr = { - .id = MLXSW_REG_RGCR_ID, - .len = MLXSW_REG_RGCR_LEN, -}; +MLXSW_REG_DEFINE(rgcr, MLXSW_REG_RGCR_ID, MLXSW_REG_RGCR_LEN); /* reg_rgcr_ipv4_en * IPv4 router enable. @@ -3330,10 +3320,7 @@ static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) #define MLXSW_REG_RITR_ID 0x8002 #define MLXSW_REG_RITR_LEN 0x40 -static const struct mlxsw_reg_info mlxsw_reg_ritr = { - .id = MLXSW_REG_RITR_ID, - .len = MLXSW_REG_RITR_LEN, -}; +MLXSW_REG_DEFINE(ritr, MLXSW_REG_RITR_ID, MLXSW_REG_RITR_LEN); /* reg_ritr_enable * Enables routing on the router interface. @@ -3533,10 +3520,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, #define MLXSW_REG_RATR_ID 0x8008 #define MLXSW_REG_RATR_LEN 0x2C -static const struct mlxsw_reg_info mlxsw_reg_ratr = { - .id = MLXSW_REG_RATR_ID, - .len = MLXSW_REG_RATR_LEN, -}; +MLXSW_REG_DEFINE(ratr, MLXSW_REG_RATR_ID, MLXSW_REG_RATR_LEN); enum mlxsw_reg_ratr_op { /* Read */ @@ -3663,10 +3647,7 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, #define MLXSW_REG_RALTA_ID 0x8010 #define MLXSW_REG_RALTA_LEN 0x04 -static const struct mlxsw_reg_info mlxsw_reg_ralta = { - .id = MLXSW_REG_RALTA_ID, - .len = MLXSW_REG_RALTA_LEN, -}; +MLXSW_REG_DEFINE(ralta, MLXSW_REG_RALTA_ID, MLXSW_REG_RALTA_LEN); /* reg_ralta_op * opcode (valid for Write, must be 0 on Read) @@ -3718,10 +3699,7 @@ static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc, #define MLXSW_REG_RALST_ID 0x8011 #define MLXSW_REG_RALST_LEN 0x104 -static const struct mlxsw_reg_info mlxsw_reg_ralst = { - .id = MLXSW_REG_RALST_ID, - .len = MLXSW_REG_RALST_LEN, -}; +MLXSW_REG_DEFINE(ralst, MLXSW_REG_RALST_ID, MLXSW_REG_RALST_LEN); /* reg_ralst_root_bin * The bin number of the root bin. @@ -3788,10 +3766,7 @@ static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number, #define MLXSW_REG_RALTB_ID 0x8012 #define MLXSW_REG_RALTB_LEN 0x04 -static const struct mlxsw_reg_info mlxsw_reg_raltb = { - .id = MLXSW_REG_RALTB_ID, - .len = MLXSW_REG_RALTB_LEN, -}; +MLXSW_REG_DEFINE(raltb, MLXSW_REG_RALTB_ID, MLXSW_REG_RALTB_LEN); /* reg_raltb_virtual_router * Virtual Router ID @@ -3832,10 +3807,7 @@ static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router, #define MLXSW_REG_RALUE_ID 0x8013 #define MLXSW_REG_RALUE_LEN 0x38 -static const struct mlxsw_reg_info mlxsw_reg_ralue = { - .id = MLXSW_REG_RALUE_ID, - .len = MLXSW_REG_RALUE_LEN, -}; +MLXSW_REG_DEFINE(ralue, MLXSW_REG_RALUE_ID, MLXSW_REG_RALUE_LEN); /* reg_ralue_protocol * Protocol. @@ -4095,10 +4067,7 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload) #define MLXSW_REG_RAUHT_ID 0x8014 #define MLXSW_REG_RAUHT_LEN 0x74 -static const struct mlxsw_reg_info mlxsw_reg_rauht = { - .id = MLXSW_REG_RAUHT_ID, - .len = MLXSW_REG_RAUHT_LEN, -}; +MLXSW_REG_DEFINE(rauht, MLXSW_REG_RAUHT_ID, MLXSW_REG_RAUHT_LEN); enum mlxsw_reg_rauht_type { MLXSW_REG_RAUHT_TYPE_IPV4, @@ -4234,10 +4203,7 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, #define MLXSW_REG_RALEU_ID 0x8015 #define MLXSW_REG_RALEU_LEN 0x28 -static const struct mlxsw_reg_info mlxsw_reg_raleu = { - .id = MLXSW_REG_RALEU_ID, - .len = MLXSW_REG_RALEU_LEN, -}; +MLXSW_REG_DEFINE(raleu, MLXSW_REG_RALEU_ID, MLXSW_REG_RALEU_LEN); /* reg_raleu_protocol * Protocol. @@ -4309,10 +4275,7 @@ static inline void mlxsw_reg_raleu_pack(char *payload, MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) #define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 -static const struct mlxsw_reg_info mlxsw_reg_rauhtd = { - .id = MLXSW_REG_RAUHTD_ID, - .len = MLXSW_REG_RAUHTD_LEN, -}; +MLXSW_REG_DEFINE(rauhtd, MLXSW_REG_RAUHTD_ID, MLXSW_REG_RAUHTD_LEN); #define MLXSW_REG_RAUHTD_FILTER_A BIT(0) #define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) @@ -4444,10 +4407,7 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, #define MLXSW_REG_MFCR_ID 0x9001 #define MLXSW_REG_MFCR_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_mfcr = { - .id = MLXSW_REG_MFCR_ID, - .len = MLXSW_REG_MFCR_LEN, -}; +MLXSW_REG_DEFINE(mfcr, MLXSW_REG_MFCR_ID, MLXSW_REG_MFCR_LEN); enum mlxsw_reg_mfcr_pwm_frequency { MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, @@ -4464,7 +4424,7 @@ enum mlxsw_reg_mfcr_pwm_frequency { * Controls the frequency of the PWM signal. * Access: RW */ -MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7); #define MLXSW_MFCR_TACHOS_MAX 10 @@ -4507,10 +4467,7 @@ mlxsw_reg_mfcr_unpack(char *payload, #define MLXSW_REG_MFSC_ID 0x9002 #define MLXSW_REG_MFSC_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_mfsc = { - .id = MLXSW_REG_MFSC_ID, - .len = MLXSW_REG_MFSC_LEN, -}; +MLXSW_REG_DEFINE(mfsc, MLXSW_REG_MFSC_ID, MLXSW_REG_MFSC_LEN); /* reg_mfsc_pwm * Fan pwm to control / monitor. @@ -4541,10 +4498,7 @@ static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, #define MLXSW_REG_MFSM_ID 0x9003 #define MLXSW_REG_MFSM_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_mfsm = { - .id = MLXSW_REG_MFSM_ID, - .len = MLXSW_REG_MFSM_LEN, -}; +MLXSW_REG_DEFINE(mfsm, MLXSW_REG_MFSM_ID, MLXSW_REG_MFSM_LEN); /* reg_mfsm_tacho * Fan tachometer index. @@ -4572,10 +4526,7 @@ static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) #define MLXSW_REG_MTCAP_ID 0x9009 #define MLXSW_REG_MTCAP_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_mtcap = { - .id = MLXSW_REG_MTCAP_ID, - .len = MLXSW_REG_MTCAP_LEN, -}; +MLXSW_REG_DEFINE(mtcap, MLXSW_REG_MTCAP_ID, MLXSW_REG_MTCAP_LEN); /* reg_mtcap_sensor_count * Number of sensors supported by the device. @@ -4593,10 +4544,7 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); #define MLXSW_REG_MTMP_ID 0x900A #define MLXSW_REG_MTMP_LEN 0x20 -static const struct mlxsw_reg_info mlxsw_reg_mtmp = { - .id = MLXSW_REG_MTMP_ID, - .len = MLXSW_REG_MTMP_LEN, -}; +MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); /* reg_mtmp_sensor_index * Sensors index to access. @@ -4679,10 +4627,7 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, #define MLXSW_REG_MPAT_ID 0x901A #define MLXSW_REG_MPAT_LEN 0x78 -static const struct mlxsw_reg_info mlxsw_reg_mpat = { - .id = MLXSW_REG_MPAT_ID, - .len = MLXSW_REG_MPAT_LEN, -}; +MLXSW_REG_DEFINE(mpat, MLXSW_REG_MPAT_ID, MLXSW_REG_MPAT_LEN); /* reg_mpat_pa_id * Port Analyzer ID. @@ -4742,10 +4687,7 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, #define MLXSW_REG_MPAR_ID 0x901B #define MLXSW_REG_MPAR_LEN 0x08 -static const struct mlxsw_reg_info mlxsw_reg_mpar = { - .id = MLXSW_REG_MPAR_ID, - .len = MLXSW_REG_MPAR_LEN, -}; +MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN); /* reg_mpar_local_port * The local port to mirror the packets from. @@ -4795,10 +4737,7 @@ static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port, #define MLXSW_REG_MLCR_ID 0x902B #define MLXSW_REG_MLCR_LEN 0x0C -static const struct mlxsw_reg_info mlxsw_reg_mlcr = { - .id = MLXSW_REG_MLCR_ID, - .len = MLXSW_REG_MLCR_LEN, -}; +MLXSW_REG_DEFINE(mlcr, MLXSW_REG_MLCR_ID, MLXSW_REG_MLCR_LEN); /* reg_mlcr_local_port * Local port number. @@ -4839,10 +4778,7 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, #define MLXSW_REG_SBPR_ID 0xB001 #define MLXSW_REG_SBPR_LEN 0x14 -static const struct mlxsw_reg_info mlxsw_reg_sbpr = { - .id = MLXSW_REG_SBPR_ID, - .len = MLXSW_REG_SBPR_LEN, -}; +MLXSW_REG_DEFINE(sbpr, MLXSW_REG_SBPR_ID, MLXSW_REG_SBPR_LEN); /* shared direstion enum for SBPR, SBCM, SBPM */ enum mlxsw_reg_sbxx_dir { @@ -4899,10 +4835,7 @@ static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, #define MLXSW_REG_SBCM_ID 0xB002 #define MLXSW_REG_SBCM_LEN 0x28 -static const struct mlxsw_reg_info mlxsw_reg_sbcm = { - .id = MLXSW_REG_SBCM_ID, - .len = MLXSW_REG_SBCM_LEN, -}; +MLXSW_REG_DEFINE(sbcm, MLXSW_REG_SBCM_ID, MLXSW_REG_SBCM_LEN); /* reg_sbcm_local_port * Local port number. @@ -4979,10 +4912,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, #define MLXSW_REG_SBPM_ID 0xB003 #define MLXSW_REG_SBPM_LEN 0x28 -static const struct mlxsw_reg_info mlxsw_reg_sbpm = { - .id = MLXSW_REG_SBPM_ID, - .len = MLXSW_REG_SBPM_LEN, -}; +MLXSW_REG_DEFINE(sbpm, MLXSW_REG_SBPM_ID, MLXSW_REG_SBPM_LEN); /* reg_sbpm_local_port * Local port number. @@ -5073,10 +5003,7 @@ static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy, #define MLXSW_REG_SBMM_ID 0xB004 #define MLXSW_REG_SBMM_LEN 0x28 -static const struct mlxsw_reg_info mlxsw_reg_sbmm = { - .id = MLXSW_REG_SBMM_ID, - .len = MLXSW_REG_SBMM_LEN, -}; +MLXSW_REG_DEFINE(sbmm, MLXSW_REG_SBMM_ID, MLXSW_REG_SBMM_LEN); /* reg_sbmm_prio * Switch Priority. @@ -5135,10 +5062,7 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, MLXSW_REG_SBSR_REC_LEN * \ MLXSW_REG_SBSR_REC_MAX_COUNT) -static const struct mlxsw_reg_info mlxsw_reg_sbsr = { - .id = MLXSW_REG_SBSR_ID, - .len = MLXSW_REG_SBSR_LEN, -}; +MLXSW_REG_DEFINE(sbsr, MLXSW_REG_SBSR_ID, MLXSW_REG_SBSR_LEN); /* reg_sbsr_clr * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy @@ -5228,10 +5152,7 @@ static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, #define MLXSW_REG_SBIB_ID 0xB006 #define MLXSW_REG_SBIB_LEN 0x10 -static const struct mlxsw_reg_info mlxsw_reg_sbib = { - .id = MLXSW_REG_SBIB_ID, - .len = MLXSW_REG_SBIB_LEN, -}; +MLXSW_REG_DEFINE(sbib, MLXSW_REG_SBIB_ID, MLXSW_REG_SBIB_LEN); /* reg_sbib_local_port * Local port number @@ -5256,132 +5177,81 @@ static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port, mlxsw_reg_sbib_buff_size_set(payload, buff_size); } +static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { + MLXSW_REG(sgcr), + MLXSW_REG(spad), + MLXSW_REG(smid), + MLXSW_REG(sspr), + MLXSW_REG(sfdat), + MLXSW_REG(sfd), + MLXSW_REG(sfn), + MLXSW_REG(spms), + MLXSW_REG(spvid), + MLXSW_REG(spvm), + MLXSW_REG(spaft), + MLXSW_REG(sfgc), + MLXSW_REG(sftr), + MLXSW_REG(sfdf), + MLXSW_REG(sldr), + MLXSW_REG(slcr), + MLXSW_REG(slcor), + MLXSW_REG(spmlr), + MLXSW_REG(svfa), + MLXSW_REG(svpe), + MLXSW_REG(sfmr), + MLXSW_REG(spvmlr), + MLXSW_REG(qtct), + MLXSW_REG(qeec), + MLXSW_REG(pmlp), + MLXSW_REG(pmtu), + MLXSW_REG(ptys), + MLXSW_REG(ppad), + MLXSW_REG(paos), + MLXSW_REG(pfcc), + MLXSW_REG(ppcnt), + MLXSW_REG(plib), + MLXSW_REG(pptb), + MLXSW_REG(pbmc), + MLXSW_REG(pspa), + MLXSW_REG(htgt), + MLXSW_REG(hpkt), + MLXSW_REG(rgcr), + MLXSW_REG(ritr), + MLXSW_REG(ratr), + MLXSW_REG(ralta), + MLXSW_REG(ralst), + MLXSW_REG(raltb), + MLXSW_REG(ralue), + MLXSW_REG(rauht), + MLXSW_REG(raleu), + MLXSW_REG(rauhtd), + MLXSW_REG(mfcr), + MLXSW_REG(mfsc), + MLXSW_REG(mfsm), + MLXSW_REG(mtcap), + MLXSW_REG(mtmp), + MLXSW_REG(mpat), + MLXSW_REG(mpar), + MLXSW_REG(mlcr), + MLXSW_REG(sbpr), + MLXSW_REG(sbcm), + MLXSW_REG(sbpm), + MLXSW_REG(sbmm), + MLXSW_REG(sbsr), + MLXSW_REG(sbib), +}; + static inline const char *mlxsw_reg_id_str(u16 reg_id) { - switch (reg_id) { - case MLXSW_REG_SGCR_ID: - return "SGCR"; - case MLXSW_REG_SPAD_ID: - return "SPAD"; - case MLXSW_REG_SMID_ID: - return "SMID"; - case MLXSW_REG_SSPR_ID: - return "SSPR"; - case MLXSW_REG_SFDAT_ID: - return "SFDAT"; - case MLXSW_REG_SFD_ID: - return "SFD"; - case MLXSW_REG_SFN_ID: - return "SFN"; - case MLXSW_REG_SPMS_ID: - return "SPMS"; - case MLXSW_REG_SPVID_ID: - return "SPVID"; - case MLXSW_REG_SPVM_ID: - return "SPVM"; - case MLXSW_REG_SPAFT_ID: - return "SPAFT"; - case MLXSW_REG_SFGC_ID: - return "SFGC"; - case MLXSW_REG_SFTR_ID: - return "SFTR"; - case MLXSW_REG_SFDF_ID: - return "SFDF"; - case MLXSW_REG_SLDR_ID: - return "SLDR"; - case MLXSW_REG_SLCR_ID: - return "SLCR"; - case MLXSW_REG_SLCOR_ID: - return "SLCOR"; - case MLXSW_REG_SPMLR_ID: - return "SPMLR"; - case MLXSW_REG_SVFA_ID: - return "SVFA"; - case MLXSW_REG_SVPE_ID: - return "SVPE"; - case MLXSW_REG_SFMR_ID: - return "SFMR"; - case MLXSW_REG_SPVMLR_ID: - return "SPVMLR"; - case MLXSW_REG_QTCT_ID: - return "QTCT"; - case MLXSW_REG_QEEC_ID: - return "QEEC"; - case MLXSW_REG_PMLP_ID: - return "PMLP"; - case MLXSW_REG_PMTU_ID: - return "PMTU"; - case MLXSW_REG_PTYS_ID: - return "PTYS"; - case MLXSW_REG_PPAD_ID: - return "PPAD"; - case MLXSW_REG_PAOS_ID: - return "PAOS"; - case MLXSW_REG_PFCC_ID: - return "PFCC"; - case MLXSW_REG_PPCNT_ID: - return "PPCNT"; - case MLXSW_REG_PPTB_ID: - return "PPTB"; - case MLXSW_REG_PBMC_ID: - return "PBMC"; - case MLXSW_REG_PSPA_ID: - return "PSPA"; - case MLXSW_REG_HTGT_ID: - return "HTGT"; - case MLXSW_REG_HPKT_ID: - return "HPKT"; - case MLXSW_REG_RGCR_ID: - return "RGCR"; - case MLXSW_REG_RITR_ID: - return "RITR"; - case MLXSW_REG_RATR_ID: - return "RATR"; - case MLXSW_REG_RALTA_ID: - return "RALTA"; - case MLXSW_REG_RALST_ID: - return "RALST"; - case MLXSW_REG_RALTB_ID: - return "RALTB"; - case MLXSW_REG_RALUE_ID: - return "RALUE"; - case MLXSW_REG_RAUHT_ID: - return "RAUHT"; - case MLXSW_REG_RALEU_ID: - return "RALEU"; - case MLXSW_REG_RAUHTD_ID: - return "RAUHTD"; - case MLXSW_REG_MFCR_ID: - return "MFCR"; - case MLXSW_REG_MFSC_ID: - return "MFSC"; - case MLXSW_REG_MFSM_ID: - return "MFSM"; - case MLXSW_REG_MTCAP_ID: - return "MTCAP"; - case MLXSW_REG_MPAT_ID: - return "MPAT"; - case MLXSW_REG_MPAR_ID: - return "MPAR"; - case MLXSW_REG_MTMP_ID: - return "MTMP"; - case MLXSW_REG_MLCR_ID: - return "MLCR"; - case MLXSW_REG_SBPR_ID: - return "SBPR"; - case MLXSW_REG_SBCM_ID: - return "SBCM"; - case MLXSW_REG_SBPM_ID: - return "SBPM"; - case MLXSW_REG_SBMM_ID: - return "SBMM"; - case MLXSW_REG_SBSR_ID: - return "SBSR"; - case MLXSW_REG_SBIB_ID: - return "SBIB"; - default: - return "*UNKNOWN*"; + const struct mlxsw_reg_info *reg_info; + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_reg_infos); i++) { + reg_info = mlxsw_reg_infos[i]; + if (reg_info->id == reg_id) + return reg_info->name; } + return "*UNKNOWN*"; } /* PUDE - Port Up / Down Event diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h new file mode 100644 index 000000000000..a031e45c8b06 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -0,0 +1,121 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/resources.h + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_RESOURCES_H +#define _MLXSW_RESOURCES_H + +#include <linux/kernel.h> +#include <linux/types.h> + +enum mlxsw_res_id { + MLXSW_RES_ID_KVD_SIZE, + MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, + MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, + MLXSW_RES_ID_MAX_SPAN, + MLXSW_RES_ID_MAX_SYSTEM_PORT, + MLXSW_RES_ID_MAX_LAG, + MLXSW_RES_ID_MAX_LAG_MEMBERS, + MLXSW_RES_ID_MAX_VRS, + MLXSW_RES_ID_MAX_RIFS, + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + MLXSW_RES_ID_KVD_SINGLE_SIZE, + MLXSW_RES_ID_KVD_DOUBLE_SIZE, + MLXSW_RES_ID_KVD_LINEAR_SIZE, + + __MLXSW_RES_ID_MAX, +}; + +static u16 mlxsw_res_ids[] = { + [MLXSW_RES_ID_KVD_SIZE] = 0x1001, + [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, + [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, + [MLXSW_RES_ID_MAX_SPAN] = 0x2420, + [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502, + [MLXSW_RES_ID_MAX_LAG] = 0x2520, + [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, + [MLXSW_RES_ID_MAX_VRS] = 0x2C01, + [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, +}; + +struct mlxsw_res { + bool valid[__MLXSW_RES_ID_MAX]; + u64 values[__MLXSW_RES_ID_MAX]; +}; + +static inline bool mlxsw_res_valid(struct mlxsw_res *res, + enum mlxsw_res_id res_id) +{ + return res->valid[res_id]; +} + +#define MLXSW_RES_VALID(res, short_res_id) \ + mlxsw_res_valid(res, MLXSW_RES_ID_##short_res_id) + +static inline u64 mlxsw_res_get(struct mlxsw_res *res, + enum mlxsw_res_id res_id) +{ + if (WARN_ON(!res->valid[res_id])) + return 0; + return res->values[res_id]; +} + +#define MLXSW_RES_GET(res, short_res_id) \ + mlxsw_res_get(res, MLXSW_RES_ID_##short_res_id) + +static inline void mlxsw_res_set(struct mlxsw_res *res, + enum mlxsw_res_id res_id, u64 value) +{ + res->valid[res_id] = true; + res->values[res_id] = value; +} + +#define MLXSW_RES_SET(res, short_res_id, value) \ + mlxsw_res_set(res, MLXSW_RES_ID_##short_res_id, value) + +static inline void mlxsw_res_parse(struct mlxsw_res *res, u16 id, u64 value) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_res_ids); i++) { + if (mlxsw_res_ids[i] == id) { + mlxsw_res_set(res, i, value); + return; + } + } +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dda5761e91bc..4a1f9d5f7c03 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -37,6 +37,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/pci.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -53,12 +54,12 @@ #include <linux/dcbnl.h> #include <linux/inetdevice.h> #include <net/switchdev.h> -#include <generated/utsrelease.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include "spectrum.h" +#include "pci.h" #include "core.h" #include "reg.h" #include "port.h" @@ -156,7 +157,7 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) { - char spad_pl[MLXSW_REG_SPAD_LEN]; + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; int err; err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl); @@ -168,14 +169,13 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!resources->max_span_valid) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) return -EIO; - mlxsw_sp->span.entries_count = resources->max_span; + mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_SPAN); mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, sizeof(struct mlxsw_sp_span_entry), GFP_KERNEL); @@ -1239,8 +1239,10 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, tcf_exts_to_list(cls->exts, &actions); list_for_each_entry(a, &actions, list) { - if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) + if (!is_tcf_mirred_egress_mirror(a) || + protocol != htons(ETH_P_ALL)) { return -ENOTSUPP; + } err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls, a, ingress); @@ -1413,7 +1415,7 @@ err_port_pause_configure: struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; - u64 (*getter)(char *payload); + u64 (*getter)(const char *payload); }; static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { @@ -1534,7 +1536,7 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { #define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) -static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(char *ppcnt_pl) +static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(const char *ppcnt_pl) { u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); @@ -2002,12 +2004,12 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, int err; autoneg = mlxsw_sp_port->link.autoneg; - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, - ð_proto_oper); + mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, + ð_proto_oper); mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd); @@ -2036,11 +2038,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, bool autoneg; int err; - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, NULL, NULL); autoneg = cmd->base.autoneg == AUTONEG_ENABLE; eth_proto_new = autoneg ? @@ -2053,7 +2055,8 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, return -EINVAL; } - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, + eth_proto_new); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; @@ -2091,8 +2094,8 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) u32 eth_proto_admin; eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed); - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, - eth_proto_admin); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, + eth_proto_admin); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } @@ -2210,8 +2213,8 @@ static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); } -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) +static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width, u8 lane) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -2221,6 +2224,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); if (!dev) return -ENOMEM; + SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); mlxsw_sp_port = netdev_priv(dev); mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; @@ -2284,6 +2288,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; dev->hw_features |= NETIF_F_HW_TC; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; + /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ @@ -2352,20 +2359,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_register_netdev; } - err = mlxsw_core_port_init(mlxsw_sp->core, &mlxsw_sp_port->core_port, - mlxsw_sp_port->local_port, dev, - mlxsw_sp_port->split, module); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", - mlxsw_sp_port->local_port); - goto err_core_port_init; - } - + mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, + mlxsw_sp_port, dev, mlxsw_sp_port->split, + module); mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); return 0; -err_core_port_init: - unregister_netdev(dev); err_register_netdev: mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); @@ -2394,14 +2393,34 @@ err_port_active_vlans_alloc: return err; } -static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width, u8 lane) +{ + int err; + + err = mlxsw_core_port_init(mlxsw_sp->core, local_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + err = __mlxsw_sp_port_create(mlxsw_sp, local_port, false, + module, width, lane); + if (err) + goto err_port_create; + return 0; + +err_port_create: + mlxsw_core_port_fini(mlxsw_sp->core, local_port); + return err; +} + +static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - if (!mlxsw_sp_port) - return; cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); - mlxsw_core_port_fini(&mlxsw_sp_port->core_port); + mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); @@ -2417,12 +2436,24 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) free_netdev(mlxsw_sp_port->dev); } +static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + __mlxsw_sp_port_remove(mlxsw_sp, local_port); + mlxsw_core_port_fini(mlxsw_sp->core, local_port); +} + +static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + return mlxsw_sp->ports[local_port] != NULL; +} + static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) { int i; for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) - mlxsw_sp_port_remove(mlxsw_sp, i); + if (mlxsw_sp_port_created(mlxsw_sp, i)) + mlxsw_sp_port_remove(mlxsw_sp, i); kfree(mlxsw_sp->ports); } @@ -2446,8 +2477,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width, - lane); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, + module, width, lane); if (err) goto err_port_create; } @@ -2456,7 +2487,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) err_port_create: err_port_module_info_get: for (i--; i >= 1; i--) - mlxsw_sp_port_remove(mlxsw_sp, i); + if (mlxsw_sp_port_created(mlxsw_sp, i)) + mlxsw_sp_port_remove(mlxsw_sp, i); kfree(mlxsw_sp->ports); return err; } @@ -2498,7 +2530,8 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, err_port_create: for (i--; i >= 0; i--) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); i = count; err_port_swid_set: for (i--; i >= 0; i--) @@ -2588,7 +2621,8 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, } for (i = 0; i < count; i++) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count); if (err) { @@ -2633,7 +2667,8 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) base_port = base_port + 2; for (i = 0; i < count; i++) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); @@ -2889,7 +2924,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; char slcr_pl[MLXSW_REG_SLCR_LEN]; int err; @@ -2906,11 +2940,11 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) if (err) return err; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid)) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG_MEMBERS)) return -EIO; - mlxsw_sp->lags = kcalloc(resources->max_lag, + mlxsw_sp->lags = kcalloc(MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG), sizeof(struct mlxsw_sp_upper), GFP_KERNEL); if (!mlxsw_sp->lags) @@ -3065,8 +3099,7 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { }; static struct mlxsw_driver mlxsw_sp_driver = { - .kind = MLXSW_DEVICE_KIND_SPECTRUM, - .owner = THIS_MODULE, + .kind = mlxsw_sp_driver_name, .priv_size = sizeof(struct mlxsw_sp), .init = mlxsw_sp_init, .fini = mlxsw_sp_fini, @@ -3092,19 +3125,30 @@ static bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } +static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) +{ + struct mlxsw_sp_port **port = data; + int ret = 0; + + if (mlxsw_sp_port_dev_check(lower_dev)) { + *port = netdev_priv(lower_dev); + ret = 1; + } + + return ret; +} + static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct net_device *lower_dev; - struct list_head *iter; + struct mlxsw_sp_port *port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - netdev_for_each_all_lower_dev(dev, lower_dev, iter) { - if (mlxsw_sp_port_dev_check(lower_dev)) - return netdev_priv(lower_dev); - } - return NULL; + port = NULL; + netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port); + + return port; } static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) @@ -3117,17 +3161,15 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct net_device *lower_dev; - struct list_head *iter; + struct mlxsw_sp_port *port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { - if (mlxsw_sp_port_dev_check(lower_dev)) - return netdev_priv(lower_dev); - } - return NULL; + port = NULL; + netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port); + + return port; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) @@ -3171,11 +3213,9 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_rif; i++) + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) if (!mlxsw_sp->rifs[i]) return i; @@ -3698,14 +3738,15 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u16 lag_id = mlxsw_sp_port->lag_id; - struct mlxsw_resources *resources; + u64 max_lag_members; int i, count = 0; if (!mlxsw_sp_port->lagged) return true; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_ports_in_lag; i++) { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + for (i = 0; i < max_lag_members; i++) { struct mlxsw_sp_port *lag_port; lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); @@ -3911,13 +3952,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, u16 *p_lag_id) { - struct mlxsw_resources *resources; struct mlxsw_sp_upper *lag; int free_lag_id = -1; + u64 max_lag; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_lag; i++) { + max_lag = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG); + for (i = 0; i < max_lag; i++) { lag = mlxsw_sp_lag_get(mlxsw_sp, i); if (lag->ref_count) { if (lag->dev == lag_dev) { @@ -3951,11 +3992,12 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 *p_port_index) { - struct mlxsw_resources *resources; + u64 max_lag_members; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_ports_in_lag; i++) { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + for (i = 0; i < max_lag_members; i++) { if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { *p_port_index = i; return 0; @@ -4652,6 +4694,16 @@ static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; +static const struct pci_device_id mlxsw_sp_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp_pci_driver = { + .name = mlxsw_sp_driver_name, + .id_table = mlxsw_sp_pci_id_table, +}; + static int __init mlxsw_sp_module_init(void) { int err; @@ -4663,8 +4715,15 @@ static int __init mlxsw_sp_module_init(void) err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) goto err_core_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sp_pci_driver); + if (err) + goto err_pci_driver_register; + return 0; +err_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); @@ -4674,6 +4733,7 @@ err_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { + mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); @@ -4686,4 +4746,4 @@ module_exit(mlxsw_sp_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Spectrum driver"); -MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SPECTRUM); +MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 97bbc1d21df8..cc1af19d699a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -316,7 +316,6 @@ struct mlxsw_sp_port_pcpu_stats { }; struct mlxsw_sp_port { - struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sp *mlxsw_sp; @@ -479,12 +478,9 @@ static inline struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { - struct mlxsw_resources *resources; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - - for (i = 0; i < resources->max_rif; i++) + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) return mlxsw_sp->rifs[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e83072da6272..164bd309e92b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -382,12 +382,10 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_virtual_routers; i++) { + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) return vr; @@ -429,14 +427,12 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, enum mlxsw_sp_l3proto proto) { - struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_virtual_routers; i++) { + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; if (vr->used && vr->proto == proto && vr->tb_id == tb_id) return vr; @@ -572,21 +568,20 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; + u64 max_vrs; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!resources->max_virtual_routers_valid) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) return -EIO; - mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, - sizeof(struct mlxsw_sp_vr), + max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); + mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), GFP_KERNEL); if (!mlxsw_sp->router.vrs) return -ENOMEM; - for (i = 0; i < resources->max_virtual_routers; i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router.vrs[i]; vr->id = i; } @@ -1872,14 +1867,15 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_entry *tmp; struct mlxsw_sp_vr *vr; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_virtual_routers; i++) { + if (mlxsw_sp->router.aborted) + return; + dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) @@ -1912,21 +1908,21 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + u64 max_rifs; int err; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - if (!resources->max_rif_valid) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; - mlxsw_sp->rifs = kcalloc(resources->max_rif, - sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), + GFP_KERNEL); if (!mlxsw_sp->rifs) return -ENOMEM; mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) goto err_rgcr_fail; @@ -1940,15 +1936,13 @@ err_rgcr_fail: static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; int i; mlxsw_reg_rgcr_pack(rgcr_pl, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_rif; i++) + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) WARN_ON_ONCE(mlxsw_sp->rifs[i]); kfree(mlxsw_sp->rifs); @@ -1996,7 +1990,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_vrs_init; - err = mlxsw_sp_neigh_init(mlxsw_sp); + err = mlxsw_sp_neigh_init(mlxsw_sp); if (err) goto err_neigh_init; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 1e2c8eca3af1..b87ba7d36bc4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1196,11 +1196,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_resources *resources; + u64 max_lag_members; int i; - resources = mlxsw_core_resources_get(mlxsw_sp->core); - for (i = 0; i < resources->max_ports_in_lag; i++) { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + for (i = 0; i < max_lag_members; i++) { mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); if (mlxsw_sp_port) return mlxsw_sp_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c new file mode 100644 index 000000000000..1552594b2d1f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -0,0 +1,597 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/switchib.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <net/switchdev.h> + +#include "pci.h" +#include "core.h" +#include "reg.h" +#include "port.h" +#include "trap.h" +#include "txheader.h" +#include "ib.h" + +static const char mlxsw_sib_driver_name[] = "mlxsw_switchib"; +static const char mlxsw_sib2_driver_name[] = "mlxsw_switchib2"; + +struct mlxsw_sib_port; + +struct mlxsw_sib { + struct mlxsw_sib_port **ports; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; +}; + +struct mlxsw_sib_port { + struct mlxsw_sib *mlxsw_sib; + u8 local_port; + struct { + u8 module; + } mapping; +}; + +/* tx_v1_hdr_version + * Tx header version. + * Must be set to 1. + */ +MLXSW_ITEM32(tx_v1, hdr, version, 0x00, 28, 4); + +/* tx_v1_hdr_ctl + * Packet control type. + * 0 - Ethernet control (e.g. EMADs, LACP) + * 1 - Ethernet data + */ +MLXSW_ITEM32(tx_v1, hdr, ctl, 0x00, 26, 2); + +/* tx_v1_hdr_proto + * Packet protocol type. Must be set to 1 (Ethernet). + */ +MLXSW_ITEM32(tx_v1, hdr, proto, 0x00, 21, 3); + +/* tx_v1_hdr_swid + * Switch partition ID. Must be set to 0. + */ +MLXSW_ITEM32(tx_v1, hdr, swid, 0x00, 12, 3); + +/* tx_v1_hdr_control_tclass + * Indicates if the packet should use the control TClass and not one + * of the data TClasses. + */ +MLXSW_ITEM32(tx_v1, hdr, control_tclass, 0x00, 6, 1); + +/* tx_v1_hdr_port_mid + * Destination local port for unicast packets. + * Destination multicast ID for multicast packets. + * + * Control packets are directed to a specific egress port, while data + * packets are transmitted through the CPU port (0) into the switch partition, + * where forwarding rules are applied. + */ +MLXSW_ITEM32(tx_v1, hdr, port_mid, 0x04, 16, 16); + +/* tx_v1_hdr_type + * 0 - Data packets + * 6 - Control packets + */ +MLXSW_ITEM32(tx_v1, hdr, type, 0x0C, 0, 4); + +static void +mlxsw_sib_tx_v1_hdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_v1_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_v1_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_v1_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_v1_hdr_swid_set(txhdr, 0); + mlxsw_tx_v1_hdr_control_tclass_set(txhdr, 1); + mlxsw_tx_v1_hdr_port_mid_set(txhdr, tx_info->local_port); + mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); +} + +static int +mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port, + bool is_up) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib; + char paos_pl[MLXSW_REG_PAOS_LEN]; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sib_port->local_port, + is_up ? MLXSW_PORT_ADMIN_STATUS_UP : + MLXSW_PORT_ADMIN_STATUS_DOWN); + return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(paos), paos_pl); +} + +static int mlxsw_sib_port_mtu_set(struct mlxsw_sib_port *mlxsw_sib_port, + u16 mtu) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + int max_mtu; + int err; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl); + if (err) + return err; + max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); + + if (mtu > max_mtu) + return -EINVAL; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, mtu); + return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl); +} + +static int mlxsw_sib_port_set(struct mlxsw_sib_port *mlxsw_sib_port, u8 port) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib; + char plib_pl[MLXSW_REG_PLIB_LEN] = {0}; + int err; + + mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sib_port->local_port); + mlxsw_reg_plib_ib_port_set(plib_pl, port); + err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(plib), plib_pl); + return err; +} + +static int mlxsw_sib_port_swid_set(struct mlxsw_sib_port *mlxsw_sib_port, + u8 swid) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib; + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sib_port->local_port); + return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pspa), pspa_pl); +} + +static int mlxsw_sib_port_module_info_get(struct mlxsw_sib *mlxsw_sib, + u8 local_port, u8 *p_module, + u8 *p_width) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); + return 0; +} + +static int mlxsw_sib_port_speed_set(struct mlxsw_sib_port *mlxsw_sib_port, + u16 speed, u16 width) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + + mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sib_port->local_port, speed, + width); + return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(ptys), ptys_pl); +} + +static bool mlxsw_sib_port_created(struct mlxsw_sib *mlxsw_sib, u8 local_port) +{ + return mlxsw_sib->ports[local_port] != NULL; +} + +static int __mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port, + u8 module, u8 width) +{ + struct mlxsw_sib_port *mlxsw_sib_port; + int err; + + mlxsw_sib_port = kzalloc(sizeof(*mlxsw_sib_port), GFP_KERNEL); + if (!mlxsw_sib_port) + return -ENOMEM; + mlxsw_sib_port->mlxsw_sib = mlxsw_sib; + mlxsw_sib_port->local_port = local_port; + mlxsw_sib_port->mapping.module = module; + + err = mlxsw_sib_port_swid_set(mlxsw_sib_port, 0); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sib_port->local_port); + goto err_port_swid_set; + } + + /* Expose the IB port number as it's front panel name */ + err = mlxsw_sib_port_set(mlxsw_sib_port, module + 1); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set IB port\n", + mlxsw_sib_port->local_port); + goto err_port_ib_set; + } + + /* Supports all speeds from SDR to FDR (bitmask) and support bus width + * of 1x, 2x and 4x (3 bits bitmask) + */ + err = mlxsw_sib_port_speed_set(mlxsw_sib_port, + MLXSW_REG_PTYS_IB_SPEED_EDR - 1, + BIT(3) - 1); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set speed\n", + mlxsw_sib_port->local_port); + goto err_port_speed_set; + } + + /* Change to the maximum MTU the device supports, the SMA will take + * care of the active MTU + */ + err = mlxsw_sib_port_mtu_set(mlxsw_sib_port, MLXSW_IB_DEFAULT_MTU); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set MTU\n", + mlxsw_sib_port->local_port); + goto err_port_mtu_set; + } + + err = mlxsw_sib_port_admin_status_set(mlxsw_sib_port, true); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to change admin state to UP\n", + mlxsw_sib_port->local_port); + goto err_port_admin_set; + } + + mlxsw_core_port_ib_set(mlxsw_sib->core, mlxsw_sib_port->local_port, + mlxsw_sib_port); + mlxsw_sib->ports[local_port] = mlxsw_sib_port; + return 0; + +err_port_admin_set: +err_port_mtu_set: +err_port_speed_set: +err_port_ib_set: + mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT); +err_port_swid_set: + kfree(mlxsw_sib_port); + return err; +} + +static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port, + u8 module, u8 width) +{ + int err; + + err = mlxsw_core_port_init(mlxsw_sib->core, local_port); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + err = __mlxsw_sib_port_create(mlxsw_sib, local_port, module, width); + if (err) + goto err_port_create; + + return 0; + +err_port_create: + mlxsw_core_port_fini(mlxsw_sib->core, local_port); + return err; +} + +static void __mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port) +{ + struct mlxsw_sib_port *mlxsw_sib_port = mlxsw_sib->ports[local_port]; + + mlxsw_core_port_clear(mlxsw_sib->core, local_port, mlxsw_sib); + mlxsw_sib->ports[local_port] = NULL; + mlxsw_sib_port_admin_status_set(mlxsw_sib_port, false); + mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT); + kfree(mlxsw_sib_port); +} + +static void mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port) +{ + __mlxsw_sib_port_remove(mlxsw_sib, local_port); + mlxsw_core_port_fini(mlxsw_sib->core, local_port); +} + +static void mlxsw_sib_ports_remove(struct mlxsw_sib *mlxsw_sib) +{ + int i; + + for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) + if (mlxsw_sib_port_created(mlxsw_sib, i)) + mlxsw_sib_port_remove(mlxsw_sib, i); + kfree(mlxsw_sib->ports); +} + +static int mlxsw_sib_ports_create(struct mlxsw_sib *mlxsw_sib) +{ + size_t alloc_size; + u8 module, width; + int i; + int err; + + alloc_size = sizeof(struct mlxsw_sib_port *) * MLXSW_PORT_MAX_IB_PORTS; + mlxsw_sib->ports = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_sib->ports) + return -ENOMEM; + + for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) { + err = mlxsw_sib_port_module_info_get(mlxsw_sib, i, &module, + &width); + if (err) + goto err_port_module_info_get; + if (!width) + continue; + err = mlxsw_sib_port_create(mlxsw_sib, i, module, width); + if (err) + goto err_port_create; + } + return 0; + +err_port_create: +err_port_module_info_get: + for (i--; i >= 1; i--) + if (mlxsw_sib_port_created(mlxsw_sib, i)) + mlxsw_sib_port_remove(mlxsw_sib, i); + kfree(mlxsw_sib->ports); + return err; +} + +static void +mlxsw_sib_pude_ib_event_func(struct mlxsw_sib_port *mlxsw_sib_port, + enum mlxsw_reg_pude_oper_status status) +{ + if (status == MLXSW_PORT_OPER_STATUS_UP) + pr_info("ib link for port %d - up\n", + mlxsw_sib_port->mapping.module + 1); + else + pr_info("ib link for port %d - down\n", + mlxsw_sib_port->mapping.module + 1); +} + +static void mlxsw_sib_pude_event_func(const struct mlxsw_reg_info *reg, + char *pude_pl, void *priv) +{ + struct mlxsw_sib *mlxsw_sib = priv; + struct mlxsw_sib_port *mlxsw_sib_port; + enum mlxsw_reg_pude_oper_status status; + u8 local_port; + + local_port = mlxsw_reg_pude_local_port_get(pude_pl); + mlxsw_sib_port = mlxsw_sib->ports[local_port]; + if (!mlxsw_sib_port) { + dev_warn(mlxsw_sib->bus_info->dev, "Port %d: Link event received for non-existent port\n", + local_port); + return; + } + + status = mlxsw_reg_pude_oper_status_get(pude_pl); + mlxsw_sib_pude_ib_event_func(mlxsw_sib_port, status); +} + +static struct mlxsw_event_listener mlxsw_sib_pude_event = { + .func = mlxsw_sib_pude_event_func, + .trap_id = MLXSW_TRAP_ID_PUDE, +}; + +static int mlxsw_sib_event_register(struct mlxsw_sib *mlxsw_sib, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sib_pude_event; + break; + } + err = mlxsw_core_event_listener_register(mlxsw_sib->core, el, + mlxsw_sib); + if (err) + return err; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id); + err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_event_trap_set; + + return 0; + +err_event_trap_set: + mlxsw_core_event_listener_unregister(mlxsw_sib->core, el, mlxsw_sib); + return err; +} + +static void mlxsw_sib_event_unregister(struct mlxsw_sib *mlxsw_sib, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sib_pude_event; + break; + } + mlxsw_core_event_listener_unregister(mlxsw_sib->core, el, mlxsw_sib); +} + +static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core); + int err; + + mlxsw_sib->core = mlxsw_core; + mlxsw_sib->bus_info = mlxsw_bus_info; + + err = mlxsw_sib_ports_create(mlxsw_sib); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n"); + return err; + } + + err = mlxsw_sib_event_register(mlxsw_sib, MLXSW_TRAP_ID_PUDE); + if (err) { + dev_err(mlxsw_sib->bus_info->dev, "Failed to register for PUDE events\n"); + goto err_event_register; + } + + return 0; + +err_event_register: + mlxsw_sib_ports_remove(mlxsw_sib); + return err; +} + +static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sib_event_unregister(mlxsw_sib, MLXSW_TRAP_ID_PUDE); + mlxsw_sib_ports_remove(mlxsw_sib); +} + +static struct mlxsw_config_profile mlxsw_sib_config_profile = { + .used_max_system_port = 1, + .max_system_port = 48000, + .used_max_ib_mc = 1, + .max_ib_mc = 27, + .used_max_pkey = 1, + .max_pkey = 32, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_IB, + } + }, + .resource_query_enable = 0, +}; + +static struct mlxsw_driver mlxsw_sib_driver = { + .kind = mlxsw_sib_driver_name, + .priv_size = sizeof(struct mlxsw_sib), + .init = mlxsw_sib_init, + .fini = mlxsw_sib_fini, + .txhdr_construct = mlxsw_sib_tx_v1_hdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sib_config_profile, +}; + +static struct mlxsw_driver mlxsw_sib2_driver = { + .kind = mlxsw_sib2_driver_name, + .priv_size = sizeof(struct mlxsw_sib), + .init = mlxsw_sib_init, + .fini = mlxsw_sib_fini, + .txhdr_construct = mlxsw_sib_tx_v1_hdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sib_config_profile, +}; + +static const struct pci_device_id mlxsw_sib_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sib_pci_driver = { + .name = mlxsw_sib_driver_name, + .id_table = mlxsw_sib_pci_id_table, +}; + +static const struct pci_device_id mlxsw_sib2_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB2), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sib2_pci_driver = { + .name = mlxsw_sib2_driver_name, + .id_table = mlxsw_sib2_pci_id_table, +}; + +static int __init mlxsw_sib_module_init(void) +{ + int err; + + err = mlxsw_core_driver_register(&mlxsw_sib_driver); + if (err) + return err; + + err = mlxsw_core_driver_register(&mlxsw_sib2_driver); + if (err) + goto err_sib2_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sib_pci_driver); + if (err) + goto err_sib_pci_driver_register; + + err = mlxsw_pci_driver_register(&mlxsw_sib2_pci_driver); + if (err) + goto err_sib2_pci_driver_register; + + return 0; + +err_sib2_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver); +err_sib_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sib2_driver); +err_sib2_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sib_driver); + return err; +} + +static void __exit mlxsw_sib_module_exit(void) +{ + mlxsw_pci_driver_unregister(&mlxsw_sib2_pci_driver); + mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver); + mlxsw_core_driver_unregister(&mlxsw_sib2_driver); + mlxsw_core_driver_unregister(&mlxsw_sib_driver); +} + +module_init(mlxsw_sib_module_init); +module_exit(mlxsw_sib_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Elad Raz <eladr@@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox SwitchIB and SwitchIB-2 driver"); +MODULE_ALIAS("mlxsw_switchib2"); +MODULE_DEVICE_TABLE(pci, mlxsw_sib_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sib2_pci_id_table); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 92bda8703f87..60f19fb68e5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -3,7 +3,7 @@ * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> - * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * Copyright (c) 2015-2016 Elad Raz <eladr@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/pci.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/slab.h> @@ -44,13 +45,14 @@ #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/switchdev.h> -#include <generated/utsrelease.h> +#include "pci.h" #include "core.h" #include "reg.h" #include "port.h" #include "trap.h" #include "txheader.h" +#include "ib.h" static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2"; static const char mlxsw_sx_driver_version[] = "1.0"; @@ -74,11 +76,13 @@ struct mlxsw_sx_port_pcpu_stats { }; struct mlxsw_sx_port { - struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sx *mlxsw_sx; u8 local_port; + struct { + u8 module; + } mapping; }; /* tx_hdr_version @@ -214,14 +218,14 @@ static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port, return 0; } -static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu) +static int __mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, + u16 mtu) { struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; char pmtu_pl[MLXSW_REG_PMTU_LEN]; int max_mtu; int err; - mtu += MLXSW_TXHDR_LEN + ETH_HLEN; mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl); if (err) @@ -235,6 +239,32 @@ static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu) return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl); } +static int mlxsw_sx_port_mtu_eth_set(struct mlxsw_sx_port *mlxsw_sx_port, + u16 mtu) +{ + mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu); +} + +static int mlxsw_sx_port_mtu_ib_set(struct mlxsw_sx_port *mlxsw_sx_port, + u16 mtu) +{ + return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu); +} + +static int mlxsw_sx_port_ib_port_set(struct mlxsw_sx_port *mlxsw_sx_port, + u8 ib_port) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char plib_pl[MLXSW_REG_PLIB_LEN] = {0}; + int err; + + mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sx_port->local_port); + mlxsw_reg_plib_ib_port_set(plib_pl, ib_port); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(plib), plib_pl); + return err; +} + static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid) { struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; @@ -254,18 +284,19 @@ mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port) return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl); } -static int mlxsw_sx_port_module_check(struct mlxsw_sx_port *mlxsw_sx_port, - bool *p_usable) +static int mlxsw_sx_port_module_info_get(struct mlxsw_sx *mlxsw_sx, + u8 local_port, u8 *p_module, + u8 *p_width) { - struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; - mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sx_port->local_port); + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl); if (err) return err; - *p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false; + *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); return 0; } @@ -343,7 +374,7 @@ static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu) struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); int err; - err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu); + err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, mtu); if (err) return err; dev->mtu = mtu; @@ -382,12 +413,26 @@ mlxsw_sx_port_get_stats64(struct net_device *dev, return stats; } +static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, + size_t len) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + int err; + + err = snprintf(name, len, "p%d", mlxsw_sx_port->mapping.module + 1); + if (err >= len) + return -EINVAL; + + return 0; +} + static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_open = mlxsw_sx_port_open, .ndo_stop = mlxsw_sx_port_stop, .ndo_start_xmit = mlxsw_sx_port_xmit, .ndo_change_mtu = mlxsw_sx_port_change_mtu, .ndo_get_stats64 = mlxsw_sx_port_get_stats64, + .ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name, }; static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, @@ -410,7 +455,7 @@ static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, struct mlxsw_sx_port_hw_stats { char str[ETH_GSTRING_LEN]; - u64 (*getter)(char *payload); + u64 (*getter)(const char *payload); }; static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = { @@ -642,6 +687,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { }; #define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode) +#define MLXSW_SX_PORT_BASE_SPEED 10000 /* Mb/s */ static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto) { @@ -741,14 +787,14 @@ static int mlxsw_sx_port_get_settings(struct net_device *dev, u32 eth_proto_oper; int err; - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); if (err) { netdev_err(dev, "Failed to get proto"); return err; } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, - ð_proto_admin, ð_proto_oper); + mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); cmd->supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) | mlxsw_sx_from_ptys_supported_link(eth_proto_cap) | @@ -789,6 +835,18 @@ static u32 mlxsw_sx_to_ptys_speed(u32 speed) return ptys_proto; } +static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sx_port_link_mode[i].speed <= upper_speed) + ptys_proto |= mlxsw_sx_port_link_mode[i].mask; + } + return ptys_proto; +} + static int mlxsw_sx_port_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -808,13 +866,14 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev, mlxsw_sx_to_ptys_advert_link(cmd->advertising) : mlxsw_sx_to_ptys_speed(speed); - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); if (err) { netdev_err(dev, "Failed to get proto"); return err; } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, + NULL); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { @@ -824,7 +883,8 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev, if (eth_proto_new == eth_proto_admin) return 0; - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, eth_proto_new); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, + eth_proto_new); err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); if (err) { netdev_err(dev, "Failed to set proto admin"); @@ -888,7 +948,7 @@ static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = { static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx) { - char spad_pl[MLXSW_REG_SPAD_LEN]; + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; int err; err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl); @@ -935,13 +995,28 @@ static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port, return err; } -static int mlxsw_sx_port_speed_set(struct mlxsw_sx_port *mlxsw_sx_port, - u32 speed) +static int mlxsw_sx_port_ib_speed_set(struct mlxsw_sx_port *mlxsw_sx_port, + u16 speed, u16 width) { struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; char ptys_pl[MLXSW_REG_PTYS_LEN]; - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, speed); + mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sx_port->local_port, speed, + width); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); +} + +static int +mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + u32 upper_speed = MLXSW_SX_PORT_BASE_SPEED * width; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_admin; + + eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed); + mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, + eth_proto_admin); return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); } @@ -956,20 +1031,22 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port, return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl); } -static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) +static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, + u8 module, u8 width) { struct mlxsw_sx_port *mlxsw_sx_port; struct net_device *dev; - bool usable; int err; dev = alloc_etherdev(sizeof(struct mlxsw_sx_port)); if (!dev) return -ENOMEM; + SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev); mlxsw_sx_port = netdev_priv(dev); mlxsw_sx_port->dev = dev; mlxsw_sx_port->mlxsw_sx = mlxsw_sx; mlxsw_sx_port->local_port = local_port; + mlxsw_sx_port->mapping.module = module; mlxsw_sx_port->pcpu_stats = netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats); @@ -994,24 +1071,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | NETIF_F_VLAN_CHALLENGED; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; + /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ dev->needed_headroom = MLXSW_TXHDR_LEN; - err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable); - if (err) { - dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to check module\n", - mlxsw_sx_port->local_port); - goto err_port_module_check; - } - - if (!usable) { - dev_dbg(mlxsw_sx->bus_info->dev, "Port %d: Not usable, skipping initialization\n", - mlxsw_sx_port->local_port); - goto port_not_usable; - } - err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1026,15 +1093,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto err_port_swid_set; } - err = mlxsw_sx_port_speed_set(mlxsw_sx_port, - MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4); + err = mlxsw_sx_port_speed_by_width_set(mlxsw_sx_port, width); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n", mlxsw_sx_port->local_port); goto err_port_speed_set; } - err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, ETH_DATA_LEN); + err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, ETH_DATA_LEN); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n", mlxsw_sx_port->local_port); @@ -1069,19 +1135,11 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto err_register_netdev; } - err = mlxsw_core_port_init(mlxsw_sx->core, &mlxsw_sx_port->core_port, - mlxsw_sx_port->local_port, dev, false, 0); - if (err) { - dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n", - mlxsw_sx_port->local_port); - goto err_core_port_init; - } - + mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port, + mlxsw_sx_port, dev, false, 0); mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; -err_core_port_init: - unregister_netdev(dev); err_register_netdev: err_port_mac_learning_mode_set: err_port_stp_state_set: @@ -1091,8 +1149,6 @@ err_port_speed_set: mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: -port_not_usable: -err_port_module_check: err_dev_addr_get: free_percpu(mlxsw_sx_port->pcpu_stats); err_alloc_stats: @@ -1100,31 +1156,168 @@ err_alloc_stats: return err; } -static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) +static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, + u8 module, u8 width) +{ + int err; + + err = mlxsw_core_port_init(mlxsw_sx->core, local_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, width); + if (err) + goto err_port_create; + + return 0; + +err_port_create: + mlxsw_core_port_fini(mlxsw_sx->core, local_port); + return err; +} + +static void __mlxsw_sx_port_eth_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) { struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; - if (!mlxsw_sx_port) - return; - mlxsw_core_port_fini(&mlxsw_sx_port->core_port); + mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx); unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ + mlxsw_sx->ports[local_port] = NULL; mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); free_percpu(mlxsw_sx_port->pcpu_stats); free_netdev(mlxsw_sx_port->dev); } +static bool mlxsw_sx_port_created(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + return mlxsw_sx->ports[local_port] != NULL; +} + +static int __mlxsw_sx_port_ib_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, + u8 module, u8 width) +{ + struct mlxsw_sx_port *mlxsw_sx_port; + int err; + + mlxsw_sx_port = kzalloc(sizeof(*mlxsw_sx_port), GFP_KERNEL); + if (!mlxsw_sx_port) + return -ENOMEM; + mlxsw_sx_port->mlxsw_sx = mlxsw_sx; + mlxsw_sx_port->local_port = local_port; + mlxsw_sx_port->mapping.module = module; + + err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", + mlxsw_sx_port->local_port); + goto err_port_system_port_mapping_set; + } + + /* Adding port to Infiniband swid (1) */ + err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 1); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sx_port->local_port); + goto err_port_swid_set; + } + + /* Expose the IB port number as it's front panel name */ + err = mlxsw_sx_port_ib_port_set(mlxsw_sx_port, module + 1); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set IB port\n", + mlxsw_sx_port->local_port); + goto err_port_ib_set; + } + + /* Supports all speeds from SDR to FDR (bitmask) and support bus width + * of 1x, 2x and 4x (3 bits bitmask) + */ + err = mlxsw_sx_port_ib_speed_set(mlxsw_sx_port, + MLXSW_REG_PTYS_IB_SPEED_EDR - 1, + BIT(3) - 1); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n", + mlxsw_sx_port->local_port); + goto err_port_speed_set; + } + + /* Change to the maximum MTU the device supports, the SMA will take + * care of the active MTU + */ + err = mlxsw_sx_port_mtu_ib_set(mlxsw_sx_port, MLXSW_IB_DEFAULT_MTU); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n", + mlxsw_sx_port->local_port); + goto err_port_mtu_set; + } + + err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to change admin state to UP\n", + mlxsw_sx_port->local_port); + goto err_port_admin_set; + } + + mlxsw_core_port_ib_set(mlxsw_sx->core, mlxsw_sx_port->local_port, + mlxsw_sx_port); + mlxsw_sx->ports[local_port] = mlxsw_sx_port; + return 0; + +err_port_admin_set: +err_port_mtu_set: +err_port_speed_set: +err_port_ib_set: + mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); +err_port_swid_set: +err_port_system_port_mapping_set: + kfree(mlxsw_sx_port); + return err; +} + +static void __mlxsw_sx_port_ib_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; + + mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx); + mlxsw_sx->ports[local_port] = NULL; + mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false); + mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); + kfree(mlxsw_sx_port); +} + +static void __mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + enum devlink_port_type port_type = + mlxsw_core_port_type_get(mlxsw_sx->core, local_port); + + if (port_type == DEVLINK_PORT_TYPE_ETH) + __mlxsw_sx_port_eth_remove(mlxsw_sx, local_port); + else if (port_type == DEVLINK_PORT_TYPE_IB) + __mlxsw_sx_port_ib_remove(mlxsw_sx, local_port); +} + +static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + __mlxsw_sx_port_remove(mlxsw_sx, local_port); + mlxsw_core_port_fini(mlxsw_sx->core, local_port); +} + static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) { int i; for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) - mlxsw_sx_port_remove(mlxsw_sx, i); + if (mlxsw_sx_port_created(mlxsw_sx, i)) + mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); } static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) { size_t alloc_size; + u8 module, width; int i; int err; @@ -1134,25 +1327,57 @@ static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) return -ENOMEM; for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { - err = mlxsw_sx_port_create(mlxsw_sx, i); + err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module, + &width); + if (err) + goto err_port_module_info_get; + if (!width) + continue; + err = mlxsw_sx_port_eth_create(mlxsw_sx, i, module, width); if (err) goto err_port_create; } return 0; err_port_create: +err_port_module_info_get: for (i--; i >= 1; i--) - mlxsw_sx_port_remove(mlxsw_sx, i); + if (mlxsw_sx_port_created(mlxsw_sx, i)) + mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); return err; } +static void mlxsw_sx_pude_eth_event_func(struct mlxsw_sx_port *mlxsw_sx_port, + enum mlxsw_reg_pude_oper_status status) +{ + if (status == MLXSW_PORT_OPER_STATUS_UP) { + netdev_info(mlxsw_sx_port->dev, "link up\n"); + netif_carrier_on(mlxsw_sx_port->dev); + } else { + netdev_info(mlxsw_sx_port->dev, "link down\n"); + netif_carrier_off(mlxsw_sx_port->dev); + } +} + +static void mlxsw_sx_pude_ib_event_func(struct mlxsw_sx_port *mlxsw_sx_port, + enum mlxsw_reg_pude_oper_status status) +{ + if (status == MLXSW_PORT_OPER_STATUS_UP) + pr_info("ib link for port %d - up\n", + mlxsw_sx_port->mapping.module + 1); + else + pr_info("ib link for port %d - down\n", + mlxsw_sx_port->mapping.module + 1); +} + static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg, char *pude_pl, void *priv) { struct mlxsw_sx *mlxsw_sx = priv; struct mlxsw_sx_port *mlxsw_sx_port; enum mlxsw_reg_pude_oper_status status; + enum devlink_port_type port_type; u8 local_port; local_port = mlxsw_reg_pude_local_port_get(pude_pl); @@ -1164,13 +1389,11 @@ static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg, } status = mlxsw_reg_pude_oper_status_get(pude_pl); - if (status == MLXSW_PORT_OPER_STATUS_UP) { - netdev_info(mlxsw_sx_port->dev, "link up\n"); - netif_carrier_on(mlxsw_sx_port->dev); - } else { - netdev_info(mlxsw_sx_port->dev, "link down\n"); - netif_carrier_off(mlxsw_sx_port->dev); - } + port_type = mlxsw_core_port_type_get(mlxsw_sx->core, local_port); + if (port_type == DEVLINK_PORT_TYPE_ETH) + mlxsw_sx_pude_eth_event_func(mlxsw_sx_port, status); + else if (port_type == DEVLINK_PORT_TYPE_IB) + mlxsw_sx_pude_ib_event_func(mlxsw_sx_port, status); } static struct mlxsw_event_listener mlxsw_sx_pude_event = { @@ -1244,6 +1467,33 @@ static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port, netif_receive_skb(skb); } +static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port, + enum devlink_port_type new_type) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); + u8 module, width; + int err; + + if (new_type == DEVLINK_PORT_TYPE_AUTO) + return -EOPNOTSUPP; + + __mlxsw_sx_port_remove(mlxsw_sx, local_port); + err = mlxsw_sx_port_module_info_get(mlxsw_sx, local_port, &module, + &width); + if (err) + goto err_port_module_info_get; + + if (new_type == DEVLINK_PORT_TYPE_ETH) + err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, + width); + else if (new_type == DEVLINK_PORT_TYPE_IB) + err = __mlxsw_sx_port_ib_create(mlxsw_sx, local_port, module, + width); + +err_port_module_info_get: + return err; +} + static const struct mlxsw_rx_listener mlxsw_sx_rx_listener[] = { { .func = mlxsw_sx_rx_listener_func, @@ -1529,36 +1779,65 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_flood_mode = 1, .flood_mode = 3, .used_max_ib_mc = 1, - .max_ib_mc = 0, + .max_ib_mc = 6, .used_max_pkey = 1, .max_pkey = 0, .swid_config = { { .used_type = 1, .type = MLXSW_PORT_SWID_TYPE_ETH, + }, + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_IB, } }, .resource_query_enable = 0, }; static struct mlxsw_driver mlxsw_sx_driver = { - .kind = MLXSW_DEVICE_KIND_SWITCHX2, - .owner = THIS_MODULE, + .kind = mlxsw_sx_driver_name, .priv_size = sizeof(struct mlxsw_sx), .init = mlxsw_sx_init, .fini = mlxsw_sx_fini, .txhdr_construct = mlxsw_sx_txhdr_construct, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sx_config_profile, + .port_type_set = mlxsw_sx_port_type_set, +}; + +static const struct pci_device_id mlxsw_sx_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sx_pci_driver = { + .name = mlxsw_sx_driver_name, + .id_table = mlxsw_sx_pci_id_table, }; static int __init mlxsw_sx_module_init(void) { - return mlxsw_core_driver_register(&mlxsw_sx_driver); + int err; + + err = mlxsw_core_driver_register(&mlxsw_sx_driver); + if (err) + return err; + + err = mlxsw_pci_driver_register(&mlxsw_sx_pci_driver); + if (err) + goto err_pci_driver_register; + + return 0; + +err_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sx_driver); + return err; } static void __exit mlxsw_sx_module_exit(void) { + mlxsw_pci_driver_unregister(&mlxsw_sx_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sx_driver); } @@ -1568,4 +1847,4 @@ module_exit(mlxsw_sx_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox SwitchX-2 driver"); -MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SWITCHX2); +MODULE_DEVICE_TABLE(pci, mlxsw_sx_pci_id_table); diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 1edc973df4c4..e7e1aff40bd9 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1063,7 +1063,6 @@ static const struct net_device_ops ks8851_netdev_ops = { .ndo_start_xmit = ks8851_start_xmit, .ndo_set_mac_address = ks8851_set_mac_address, .ndo_set_rx_mode = ks8851_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 2fc5cd56c0a8..db628078a4e6 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -1285,7 +1285,6 @@ static const struct net_device_ops ks_netdev_ops = { .ndo_start_xmit = ks_start_xmit, .ndo_set_mac_address = ks_set_mac_address, .ndo_set_rx_mode = ks_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 280e761d3a97..97f6ef1fa7d0 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -5807,24 +5807,19 @@ static int netdev_change_mtu(struct net_device *dev, int new_mtu) if (hw->dev_count > 1) if (dev != hw_priv->dev) return 0; - if (new_mtu < 60) - return -EINVAL; - if (dev->mtu != new_mtu) { - hw_mtu = new_mtu + ETHERNET_HEADER_SIZE + 4; - if (hw_mtu > MAX_RX_BUF_SIZE) - return -EINVAL; - if (hw_mtu > REGULAR_RX_BUF_SIZE) { - hw->features |= RX_HUGE_FRAME; - hw_mtu = MAX_RX_BUF_SIZE; - } else { - hw->features &= ~RX_HUGE_FRAME; - hw_mtu = REGULAR_RX_BUF_SIZE; - } - hw_mtu = (hw_mtu + 3) & ~3; - hw_priv->mtu = hw_mtu; - dev->mtu = new_mtu; + hw_mtu = new_mtu + ETHERNET_HEADER_SIZE + 4; + if (hw_mtu > REGULAR_RX_BUF_SIZE) { + hw->features |= RX_HUGE_FRAME; + hw_mtu = MAX_RX_BUF_SIZE; + } else { + hw->features &= ~RX_HUGE_FRAME; + hw_mtu = REGULAR_RX_BUF_SIZE; } + hw_mtu = (hw_mtu + 3) & ~3; + hw_priv->mtu = hw_mtu; + dev->mtu = new_mtu; + return 0; } @@ -7099,6 +7094,12 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) dev->netdev_ops = &netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; + + /* MTU range: 60 - 1894 */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = MAX_RX_BUF_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + if (register_netdev(dev)) goto pcidev_init_reg_err; port_set_power_saving(port, true); diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 0a26b11ca8f6..045b9106c0ff 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1544,7 +1544,6 @@ static const struct net_device_ops enc28j60_netdev_ops = { .ndo_set_rx_mode = enc28j60_set_multicast_list, .ndo_set_mac_address = enc28j60_set_mac_address, .ndo_tx_timeout = enc28j60_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 4367dd6879a2..9774b50cff6e 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -444,7 +444,6 @@ static struct net_device_ops moxart_netdev_ops = { .ndo_set_rx_mode = moxart_mac_set_rx_mode, .ndo_set_mac_address = moxart_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int moxart_mac_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 6d1a956e3f77..e506ca876d0d 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -289,7 +289,7 @@ static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] = {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL }; module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL, 0444); -MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board"); +MODULE_PARM_DESC(myri10ge_fw_names, "Firmware image names per board"); static int myri10ge_ecrc_enable = 1; module_param(myri10ge_ecrc_enable, int, S_IRUGO); @@ -3232,10 +3232,6 @@ static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) struct myri10ge_priv *mgp = netdev_priv(dev); int error = 0; - if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) { - netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu); - return -EINVAL; - } netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); if (mgp->running) { /* if we change the mtu on an active device, we must @@ -4086,13 +4082,19 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) myri10ge_setup_dca(mgp); #endif pci_set_drvdata(pdev, mgp); - if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU) - myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; - if ((myri10ge_initial_mtu + ETH_HLEN) < 68) - myri10ge_initial_mtu = 68; - netdev->netdev_ops = &myri10ge_netdev_ops; + /* MTU range: 68 - 9000 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; + + if (myri10ge_initial_mtu > netdev->max_mtu) + myri10ge_initial_mtu = netdev->max_mtu; + if (myri10ge_initial_mtu < netdev->min_mtu) + myri10ge_initial_mtu = netdev->min_mtu; + netdev->mtu = myri10ge_initial_mtu; + + netdev->netdev_ops = &myri10ge_netdev_ops; netdev->hw_features = mgp->features | NETIF_F_RXCSUM; /* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */ diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index acf3f11e38cc..a6caeb567c0d 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -110,7 +110,6 @@ static const struct net_device_ops sonic_netdev_ops = { .ndo_get_stats = sonic_get_stats, .ndo_set_rx_mode = sonic_multicast_list, .ndo_tx_timeout = sonic_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index d98f5b8a1c66..3ca6ae7caf55 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -190,7 +190,6 @@ static const struct net_device_ops macsonic_netdev_ops = { .ndo_tx_timeout = sonic_tx_timeout, .ndo_get_stats = sonic_get_stats, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index ed89029ff75b..22b0821c1da0 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -929,6 +929,10 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = ðtool_ops; + /* MTU range: 64 - 2024 */ + dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN; + dev->max_mtu = NATSEMI_RX_LIMIT - NATSEMI_HEADERS; + if (mtu) dev->mtu = mtu; @@ -2526,9 +2530,6 @@ static void __set_rx_mode(struct net_device *dev) static int natsemi_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 64 || new_mtu > NATSEMI_RX_LIMIT-NATSEMI_HEADERS) - return -EINVAL; - dev->mtu = new_mtu; /* synchronized against open : rtnl_lock() held by caller */ diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 569ade6cf85c..93c4bdc0cdca 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -919,7 +919,7 @@ netdev_mangle_me_harder_failed: ndev->stats.rx_dropped++; } } else { - kfree_skb(skb); + dev_kfree_skb_irq(skb); } nr++; @@ -1679,14 +1679,6 @@ static void ns83820_getmac(struct ns83820 *dev, u8 *mac) } } -static int ns83820_change_mtu(struct net_device *ndev, int new_mtu) -{ - if (new_mtu > RX_BUF_SIZE) - return -EINVAL; - ndev->mtu = new_mtu; - return 0; -} - static void ns83820_set_multicast(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); @@ -1933,7 +1925,6 @@ static const struct net_device_ops netdev_ops = { .ndo_stop = ns83820_stop, .ndo_start_xmit = ns83820_hard_start_xmit, .ndo_get_stats = ns83820_get_stats, - .ndo_change_mtu = ns83820_change_mtu, .ndo_set_rx_mode = ns83820_set_multicast, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, @@ -2190,6 +2181,8 @@ static int ns83820_init_one(struct pci_dev *pci_dev, ndev->features |= NETIF_F_SG; ndev->features |= NETIF_F_IP_CSUM; + ndev->min_mtu = 0; + #ifdef NS83820_VLAN_ACCEL_SUPPORT /* We also support hardware vlan acceleration */ ndev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 7007d212f3e4..9ee0f69a83c0 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -124,7 +124,6 @@ static const struct net_device_ops xtsonic_netdev_ops = { .ndo_set_rx_mode = sonic_multicast_list, .ndo_tx_timeout = sonic_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index eaa37c079a7c..564f682fa4dc 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -6678,11 +6678,6 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu) struct s2io_nic *sp = netdev_priv(dev); int ret = 0; - if ((new_mtu < MIN_MTU) || (new_mtu > S2IO_JUMBO_SIZE)) { - DBG_PRINT(ERR_DBG, "%s: MTU size is invalid.\n", dev->name); - return -EPERM; - } - dev->mtu = new_mtu; if (netif_running(dev)) { s2io_stop_all_tx_queue(sp); @@ -8019,6 +8014,10 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) config->mc_start_offset = S2IO_HERC_MC_ADDR_START_OFFSET; } + /* MTU range: 46 - 9600 */ + dev->min_mtu = MIN_MTU; + dev->max_mtu = S2IO_JUMBO_SIZE; + /* store mac addresses from CAM to s2io_nic structure */ do_s2io_store_unicast_mc(sp); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index 6ce4412fcc1a..cfa970417f81 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -27,7 +27,7 @@ (((size) - (((u64)adrs) & ((size)-1))) & ((size)-1)) #endif -#define VXGE_HW_MIN_MTU 68 +#define VXGE_HW_MIN_MTU ETH_MIN_MTU #define VXGE_HW_MAX_MTU 9600 #define VXGE_HW_DEFAULT_MTU 1500 diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index e0993eba5df3..e07b936f64ec 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3074,11 +3074,6 @@ static int vxge_change_mtu(struct net_device *dev, int new_mtu) vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__); - if ((new_mtu < VXGE_HW_MIN_MTU) || (new_mtu > VXGE_HW_MAX_MTU)) { - vxge_debug_init(vdev->level_err, - "%s: mtu size is invalid", dev->name); - return -EPERM; - } /* check if device is down already */ if (unlikely(!is_vxge_card_up(vdev))) { @@ -3462,6 +3457,10 @@ static int vxge_device_register(struct __vxge_hw_device *hldev, "%s : using High DMA", __func__); } + /* MTU range: 68 - 9600 */ + ndev->min_mtu = VXGE_HW_MIN_MTU; + ndev->max_mtu = VXGE_HW_MAX_MTU; + ret = register_netdev(ndev); if (ret) { vxge_debug_init(vxge_hw_device_trace_level_get(hldev), diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 87aa8a3e9112..76a19f1796af 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -62,6 +62,7 @@ enum nfp_bpf_action_type { NN_ACT_TC_DROP, NN_ACT_TC_REDIR, NN_ACT_DIRECT, + NN_ACT_XDP, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index f8df5300f49c..335beb8b8b45 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1126,7 +1126,7 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.src_reg * 2, true, 4); } -static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, len)) emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), @@ -1134,12 +1134,42 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) else return -ENOTSUPP; - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + return 0; +} + +static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 dst = reg_both(meta->insn.dst_reg * 2); + + if (meta->insn.off != offsetof(struct xdp_md, data) && + meta->insn.off != offsetof(struct xdp_md, data_end)) + return -ENOTSUPP; + + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); + + if (meta->insn.off == offsetof(struct xdp_md, data)) + return 0; + + emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN); return 0; } -static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + int ret; + + if (nfp_prog->act == NN_ACT_XDP) + ret = mem_ldx4_xdp(nfp_prog, meta); + else + ret = mem_ldx4_skb(nfp_prog, meta); + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return ret; +} + +static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, mark)) return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); @@ -1147,6 +1177,18 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return -ENOTSUPP; } +static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return -ENOTSUPP; +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_prog->act == NN_ACT_XDP) + return mem_stx4_xdp(nfp_prog, meta); + return mem_stx4_skb(nfp_prog, meta); +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ @@ -1530,6 +1572,47 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } +static void nfp_outro_xdp(struct nfp_prog *nfp_prog) +{ + /* XDP return codes: + * 0 aborted 0x82 -> drop, count as stat3 + * 1 drop 0x22 -> drop, count as stat1 + * 2 pass 0x11 -> pass, count as stat0 + * 3 tx 0x44 -> redir, count as stat2 + * * unknown 0x82 -> drop, count as stat3 + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 3 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + + wrp_immed(nfp_prog, reg_b(2), 0x44112282); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { @@ -1540,6 +1623,9 @@ static void nfp_outro(struct nfp_prog *nfp_prog) case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); break; + case NN_ACT_XDP: + nfp_outro_xdp(nfp_prog); + break; } } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c index 144cae87f63a..b3361f9b8e5c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -80,6 +80,9 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, { const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + if (nfp_prog->act == NN_ACT_XDP) + return 0; + if (reg0->type != CONST_IMM) { pr_info("unsupported exit state: %d, imm: %llx\n", reg0->type, reg0->imm); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index ed824e11a1e3..2115f446031e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -75,7 +75,6 @@ /* Default size for MTU and freelist buffer sizes */ #define NFP_NET_DEFAULT_MTU 1500 -#define NFP_NET_DEFAULT_RX_BUFSZ 2048 /* Maximum number of bytes prepended to a packet */ #define NFP_NET_MAX_PREPEND 64 @@ -88,6 +87,9 @@ /* Queue/Ring definitions */ #define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ #define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ +#define NFP_NET_MAX_R_VECS (NFP_NET_MAX_TX_RINGS > NFP_NET_MAX_RX_RINGS ? \ + NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS) +#define NFP_NET_MAX_IRQS (NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS) #define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ #define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ @@ -102,6 +104,10 @@ /* Offload definitions */ #define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) +#define NFP_NET_RX_BUF_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +#define NFP_NET_RX_BUF_NON_DATA (NFP_NET_RX_BUF_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* Forward declarations */ struct nfp_net; struct nfp_net_r_vector; @@ -165,7 +171,10 @@ struct nfp_net_tx_desc { * on the head's buffer). Equal to skb->len for non-TSO packets. */ struct nfp_net_tx_buf { - struct sk_buff *skb; + union { + struct sk_buff *skb; + void *frag; + }; dma_addr_t dma_addr; short int fidx; u16 pkt_cnt; @@ -278,11 +287,11 @@ struct nfp_net_rx_hash { /** * struct nfp_net_rx_buf - software RX buffer descriptor - * @skb: sk_buff associated with this buffer + * @frag: page fragment buffer * @dma_addr: DMA mapping address of the buffer */ struct nfp_net_rx_buf { - struct sk_buff *skb; + void *frag; dma_addr_t dma_addr; }; @@ -335,6 +344,7 @@ struct nfp_net_rx_ring { * @napi: NAPI structure for this ring vec * @tx_ring: Pointer to TX ring * @rx_ring: Pointer to RX ring + * @xdp_ring: Pointer to an extra TX ring for XDP * @irq_idx: Index into MSI-X table * @rx_sync: Seqlock for atomic updates of RX stats * @rx_pkts: Number of received packets @@ -378,6 +388,8 @@ struct nfp_net_r_vector { u64 hw_csum_rx_inner_ok; u64 hw_csum_rx_error; + struct nfp_net_tx_ring *xdp_ring; + struct u64_stats_sync tx_sync; u64 tx_pkts; u64 tx_bytes; @@ -421,12 +433,13 @@ struct nfp_stat_pair { * @netdev: Backpointer to net_device structure * @nfp_fallback: Is the driver used in fallback mode? * @is_vf: Is the driver attached to a VF? - * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf + * @bpf_offload_xdp: Offloaded BPF program is XDP * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts + * @xdp_prog: Installed XDP program * @cpp: Pointer to the CPP handle * @nfp_dev_cpp: Pointer to the NFP Device handle * @ctrl_area: Pointer to the CPP area for the control BAR @@ -446,12 +459,13 @@ struct nfp_stat_pair { * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings + * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_rx_rings: Currently configured number of RX rings * @txd_cnt: Size of the TX ring in number of descriptors * @rxd_cnt: Size of the RX ring in number of descriptors * @tx_rings: Array of pre-allocated TX ring structures * @rx_rings: Array of pre-allocated RX ring structures - * @num_irqs: Number of allocated interrupt vectors + * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @num_r_vecs: Number of used ring vectors * @r_vecs: Pre-allocated array of ring vectors * @irq_entries: Pre-allocated array of MSI-X entries @@ -487,15 +501,17 @@ struct nfp_net { unsigned nfp_fallback:1; unsigned is_vf:1; - unsigned is_nfp3200:1; unsigned fw_loaded:1; unsigned bpf_offload_skip_sw:1; + unsigned bpf_offload_xdp:1; u32 ctrl; u32 fl_bufsz; u32 rx_offset; + struct bpf_prog *xdp_prog; + struct nfp_net_tx_ring *tx_rings; struct nfp_net_rx_ring *rx_rings; @@ -524,11 +540,12 @@ struct nfp_net { struct timer_list rx_filter_stats_timer; spinlock_t rx_filter_lock; - int max_tx_rings; - int max_rx_rings; + unsigned int max_tx_rings; + unsigned int max_rx_rings; - int num_tx_rings; - int num_rx_rings; + unsigned int num_tx_rings; + unsigned int num_stack_tx_rings; + unsigned int num_rx_rings; int stride_tx; int stride_rx; @@ -536,11 +553,10 @@ struct nfp_net { int txd_cnt; int rxd_cnt; - u8 num_irqs; - u8 num_r_vecs; - struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS]; - struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + - NFP_NET_MAX_TX_RINGS]; + unsigned int max_r_vecs; + unsigned int num_r_vecs; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS]; + struct msix_entry irq_entries[NFP_NET_MAX_IRQS]; irq_handler_t lsc_handler; char lsc_name[IFNAMSIZ + 8]; @@ -580,6 +596,13 @@ struct nfp_net { struct dentry *debugfs_dir; }; +struct nfp_net_ring_set { + unsigned int n_rings; + unsigned int mtu; + unsigned int dcnt; + void *rings; +}; + /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ @@ -593,16 +616,13 @@ static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well */ static inline u16 nn_readw(struct nfp_net *nn, int off) { - WARN_ON_ONCE(nn->is_nfp3200); return readw(nn->ctrl_bar + off); } static inline void nn_writew(struct nfp_net *nn, int off, u16 val) { - WARN_ON_ONCE(nn->is_nfp3200); writew(val, nn->ctrl_bar + off); } @@ -650,7 +670,7 @@ static inline void nn_pci_flush(struct nfp_net *nn) #define NFP_QCP_QUEUE_STS_HI 0x000c #define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff -/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */ +/* The offset of a QCP queues in the PCIe Target */ #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) /* nfp_qcp_ptr - Read or Write Pointer of a queue */ @@ -757,8 +777,9 @@ extern const char nfp_net_driver_version[]; void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, void __iomem *ctrl_bar); -struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, - int max_tx_rings, int max_rx_rings); +struct nfp_net * +nfp_net_netdev_alloc(struct pci_dev *pdev, + unsigned int max_tx_rings, unsigned int max_rx_rings); void nfp_net_netdev_free(struct nfp_net *nn); int nfp_net_netdev_init(struct net_device *netdev); void nfp_net_netdev_clean(struct net_device *netdev); @@ -770,7 +791,9 @@ void nfp_net_rss_write_key(struct nfp_net *nn); void nfp_net_coalesce_write_cfg(struct nfp_net *nn); int nfp_net_irqs_alloc(struct nfp_net *nn); void nfp_net_irqs_disable(struct nfp_net *nn); -int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt); +int +nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx); #ifdef CONFIG_NFP_NET_DEBUG void nfp_net_debugfs_create(void); @@ -796,8 +819,6 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) #endif /* CONFIG_NFP_NET_DEBUG */ void nfp_net_filter_stats_timer(unsigned long data); -int -nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, - struct tc_cls_bpf_offload *cls_bpf); +int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index aee3fd2b6538..99edb9fd84bf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -41,6 +41,7 @@ * Chris Telfer <chris.telfer@netronome.com> */ +#include <linux/bpf.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -50,6 +51,7 @@ #include <linux/interrupt.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/page_ref.h> #include <linux/pci.h> #include <linux/pci_regs.h> #include <linux/msi.h> @@ -80,6 +82,22 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, put_unaligned_le32(reg, fw_ver); } +static dma_addr_t +nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz, + int direction) +{ + return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM, + bufsz - NFP_NET_RX_BUF_NON_DATA, direction); +} + +static void +nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr, + unsigned int bufsz, int direction) +{ + dma_unmap_single(&nn->pdev->dev, dma_addr, + bufsz - NFP_NET_RX_BUF_NON_DATA, direction); +} + /* Firmware reconfig * * Firmware reconfig may take a while so we have two versions of it - @@ -249,43 +267,14 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update) */ /** - * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking - * @nn: NFP Network structure - * @entry_nr: MSI-X table entry - * - * Clear the MSI-X table mask bit for the given entry bypassing Linux irq - * handling subsystem. Use *only* to reenable automasked vectors. - */ -static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) -{ - struct list_head *msi_head = &nn->pdev->dev.msi_list; - struct msi_desc *entry; - u32 off; - - /* All MSI-Xs have the same mask_base */ - entry = list_first_entry(msi_head, struct msi_desc, list); - - off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + - PCI_MSIX_ENTRY_VECTOR_CTRL; - writel(0, entry->mask_base + off); - readl(entry->mask_base); -} - -/** * nfp_net_irq_unmask() - Unmask automasked interrupt * @nn: NFP Network structure * @entry_nr: MSI-X table entry * - * If MSI-X auto-masking is enabled clear the mask bit, otherwise - * clear the ICR for the entry. + * Clear the ICR for the IRQ entry. */ static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) { - if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { - nfp_net_irq_unmask_msix(nn, entry_nr); - return; - } - nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); nn_pci_flush(nn); } @@ -320,28 +309,6 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) } /** - * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want - * @nn: NFP Network structure - * - * We want a vector per CPU (or ring), whatever is smaller plus - * NFP_NET_NON_Q_VECTORS for LSC etc. - * - * Return: Number of interrupts wanted - */ -static int nfp_net_irqs_wanted(struct nfp_net *nn) -{ - int ncpus; - int vecs; - - ncpus = num_online_cpus(); - - vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); - vecs = min_t(int, vecs, ncpus); - - return vecs + NFP_NET_NON_Q_VECTORS; -} - -/** * nfp_net_irqs_alloc() - allocates MSI-X irqs * @nn: NFP Network structure * @@ -350,22 +317,24 @@ static int nfp_net_irqs_wanted(struct nfp_net *nn) int nfp_net_irqs_alloc(struct nfp_net *nn) { int wanted_irqs; + unsigned int n; - wanted_irqs = nfp_net_irqs_wanted(nn); + wanted_irqs = nn->num_r_vecs + NFP_NET_NON_Q_VECTORS; - nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); - if (nn->num_irqs == 0) { + n = nfp_net_msix_alloc(nn, wanted_irqs); + if (n == 0) { nn_err(nn, "Failed to allocate MSI-X IRQs\n"); return 0; } - nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; + nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS; + nn->num_r_vecs = nn->max_r_vecs; - if (nn->num_irqs < wanted_irqs) + if (n < wanted_irqs) nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", - wanted_irqs, nn->num_irqs); + wanted_irqs, n); - return nn->num_irqs; + return n; } /** @@ -515,18 +484,19 @@ static void nfp_net_irqs_assign(struct net_device *netdev) struct nfp_net_r_vector *r_vec; int r; - /* Assumes nn->num_tx_rings == nn->num_rx_rings */ - if (nn->num_tx_rings > nn->num_r_vecs) { - nn_warn(nn, "More rings (%d) than vectors (%d).\n", - nn->num_tx_rings, nn->num_r_vecs); - nn->num_tx_rings = nn->num_r_vecs; - nn->num_rx_rings = nn->num_r_vecs; - } + if (nn->num_rx_rings > nn->num_r_vecs || + nn->num_tx_rings > nn->num_r_vecs) + nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n", + nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs); + + nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings); + nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings); + nn->num_stack_tx_rings = nn->num_tx_rings; nn->lsc_handler = nfp_net_irq_lsc; nn->exn_handler = nfp_net_irq_exn; - for (r = 0; r < nn->num_r_vecs; r++) { + for (r = 0; r < nn->max_r_vecs; r++) { r_vec = &nn->r_vecs[r]; r_vec->nfp_net = nn; r_vec->handler = nfp_net_irq_rxtx; @@ -605,7 +575,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, * * Return: True if the ring is full. */ -static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) { return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); } @@ -745,6 +715,13 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, u64_stats_update_end(&r_vec->tx_sync); } +static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +{ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; +} + /** * nfp_net_tx() - Main transmit entry point * @skb: SKB to transmit @@ -790,7 +767,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_free; - wr_idx = tx_ring->wr_p % tx_ring->cnt; + wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1); /* Stash the soft descriptor of the head then initialize it */ txbuf = &tx_ring->txbufs[wr_idx]; @@ -834,7 +811,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_unmap; - wr_idx = (wr_idx + 1) % tx_ring->cnt; + wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1); tx_ring->txbufs[wr_idx].skb = skb; tx_ring->txbufs[wr_idx].dma_addr = dma_addr; tx_ring->txbufs[wr_idx].fidx = f; @@ -859,12 +836,8 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) nfp_net_tx_ring_stop(nd_q, tx_ring); tx_ring->wr_ptr_add += nr_frags + 1; - if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { - /* force memory write before we let HW know */ - wmb(); - nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); - tx_ring->wr_ptr_add = 0; - } + if (!skb->xmit_more || netif_xmit_stopped(nd_q)) + nfp_net_tx_xmit_more_flush(tx_ring); skb_tx_timestamp(skb); @@ -929,7 +902,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; while (todo--) { - idx = tx_ring->rd_p % tx_ring->cnt; + idx = tx_ring->rd_p & (tx_ring->cnt - 1); tx_ring->rd_p++; skb = tx_ring->txbufs[idx].skb; @@ -986,6 +959,56 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); } +static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + u32 done_pkts = 0, done_bytes = 0; + int idx, todo; + u32 qcp_rd_p; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + if (qcp_rd_p > tx_ring->qcp_rd_p) + todo = qcp_rd_p - tx_ring->qcp_rd_p; + else + todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + + while (todo--) { + idx = tx_ring->rd_p & (tx_ring->cnt - 1); + tx_ring->rd_p++; + + if (!tx_ring->txbufs[idx].frag) + continue; + + nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr, + nn->fl_bufsz, DMA_BIDIRECTIONAL); + __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); + + done_pkts++; + done_bytes += tx_ring->txbufs[idx].real_len; + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].frag = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + /** * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers * @nn: NFP Net device @@ -996,39 +1019,47 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) static void nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring) { + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; const struct skb_frag_struct *frag; - struct netdev_queue *nd_q; struct pci_dev *pdev = nn->pdev; + struct netdev_queue *nd_q; while (tx_ring->rd_p != tx_ring->wr_p) { - int nr_frags, fidx, idx; - struct sk_buff *skb; + struct nfp_net_tx_buf *tx_buf; + int idx; - idx = tx_ring->rd_p % tx_ring->cnt; - skb = tx_ring->txbufs[idx].skb; - nr_frags = skb_shinfo(skb)->nr_frags; - fidx = tx_ring->txbufs[idx].fidx; + idx = tx_ring->rd_p & (tx_ring->cnt - 1); + tx_buf = &tx_ring->txbufs[idx]; - if (fidx == -1) { - /* unmap head */ - dma_unmap_single(&pdev->dev, - tx_ring->txbufs[idx].dma_addr, - skb_headlen(skb), DMA_TO_DEVICE); + if (tx_ring == r_vec->xdp_ring) { + nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr, + nn->fl_bufsz, DMA_BIDIRECTIONAL); + __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); } else { - /* unmap fragment */ - frag = &skb_shinfo(skb)->frags[fidx]; - dma_unmap_page(&pdev->dev, - tx_ring->txbufs[idx].dma_addr, - skb_frag_size(frag), DMA_TO_DEVICE); - } + struct sk_buff *skb = tx_ring->txbufs[idx].skb; + int nr_frags = skb_shinfo(skb)->nr_frags; + + if (tx_buf->fidx == -1) { + /* unmap head */ + dma_unmap_single(&pdev->dev, tx_buf->dma_addr, + skb_headlen(skb), + DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; + dma_unmap_page(&pdev->dev, tx_buf->dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } - /* check for last gather fragment */ - if (fidx == nr_frags - 1) - dev_kfree_skb_any(skb); + /* check for last gather fragment */ + if (tx_buf->fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + } - tx_ring->txbufs[idx].dma_addr = 0; - tx_ring->txbufs[idx].skb = NULL; - tx_ring->txbufs[idx].fidx = -2; + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; tx_ring->qcp_rd_p++; tx_ring->rd_p++; @@ -1040,6 +1071,9 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring) tx_ring->qcp_rd_p = 0; tx_ring->wr_ptr_add = 0; + if (tx_ring == r_vec->xdp_ring) + return; + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); netdev_tx_reset_queue(nd_q); } @@ -1049,7 +1083,7 @@ static void nfp_net_tx_timeout(struct net_device *netdev) struct nfp_net *nn = netdev_priv(netdev); int i; - for (i = 0; i < nn->num_tx_rings; i++) { + for (i = 0; i < nn->netdev->real_num_tx_queues; i++) { if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) continue; nn_warn(nn, "TX timeout on ring: %d\n", i); @@ -1059,69 +1093,112 @@ static void nfp_net_tx_timeout(struct net_device *netdev) /* Receive processing */ +static unsigned int +nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu) +{ + unsigned int fl_bufsz; -/** - * nfp_net_rx_space() - return the number of free slots on the RX ring - * @rx_ring: RX ring structure - * - * Make sure we leave at least one slot free. - * - * Return: True if there is space on the RX ring - */ -static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + fl_bufsz += NFP_NET_MAX_PREPEND; + else + fl_bufsz += nn->rx_offset; + fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu; + + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); + fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + return fl_bufsz; +} + +static void +nfp_net_free_frag(void *frag, bool xdp) { - return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); + if (!xdp) + skb_free_frag(frag); + else + __free_page(virt_to_page(frag)); } /** - * nfp_net_rx_alloc_one() - Allocate and map skb for RX + * nfp_net_rx_alloc_one() - Allocate and map page frag for RX * @rx_ring: RX ring structure of the skb * @dma_addr: Pointer to storage for DMA address (output param) * @fl_bufsz: size of freelist buffers + * @xdp: Whether XDP is enabled * - * This function will allcate a new skb, map it for DMA. + * This function will allcate a new page frag, map it for DMA. * - * Return: allocated skb or NULL on failure. + * Return: allocated page frag or NULL on failure. */ -static struct sk_buff * +static void * nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr, - unsigned int fl_bufsz) + unsigned int fl_bufsz, bool xdp) { struct nfp_net *nn = rx_ring->r_vec->nfp_net; - struct sk_buff *skb; + int direction; + void *frag; - skb = netdev_alloc_skb(nn->netdev, fl_bufsz); - if (!skb) { - nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); + if (!xdp) + frag = netdev_alloc_frag(fl_bufsz); + else + frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD)); + if (!frag) { + nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); return NULL; } - *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, - fl_bufsz, DMA_FROM_DEVICE); + direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + + *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction); if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { - dev_kfree_skb_any(skb); + nfp_net_free_frag(frag, xdp); nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); return NULL; } - return skb; + return frag; +} + +static void * +nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr) +{ + void *frag; + + if (!nn->xdp_prog) + frag = napi_alloc_frag(nn->fl_bufsz); + else + frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD)); + if (!frag) { + nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); + return NULL; + } + + *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction); + if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { + nfp_net_free_frag(frag, nn->xdp_prog); + nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; } /** * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings * @rx_ring: RX ring structure - * @skb: Skb to put on rings + * @frag: page fragment buffer * @dma_addr: DMA address of skb mapping */ static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, - struct sk_buff *skb, dma_addr_t dma_addr) + void *frag, dma_addr_t dma_addr) { unsigned int wr_idx; - wr_idx = rx_ring->wr_p % rx_ring->cnt; + wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1); /* Stash SKB and DMA address away */ - rx_ring->rxbufs[wr_idx].skb = skb; + rx_ring->rxbufs[wr_idx].frag = frag; rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; /* Fill freelist descriptor */ @@ -1153,12 +1230,12 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) unsigned int wr_idx, last_idx; /* Move the empty entry to the end of the list */ - wr_idx = rx_ring->wr_p % rx_ring->cnt; + wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1); last_idx = rx_ring->cnt - 1; rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; - rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb; + rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; rx_ring->rxbufs[last_idx].dma_addr = 0; - rx_ring->rxbufs[last_idx].skb = NULL; + rx_ring->rxbufs[last_idx].frag = NULL; memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt); rx_ring->wr_p = 0; @@ -1170,15 +1247,17 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring * @nn: NFP Net device * @rx_ring: RX ring to remove buffers from + * @xdp: Whether XDP is enabled * * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1) * entries. After device is disabled nfp_net_rx_ring_reset() must be called * to restore required ring geometry. */ static void -nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) +nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + bool xdp) { - struct pci_dev *pdev = nn->pdev; + int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; unsigned int i; for (i = 0; i < rx_ring->cnt - 1; i++) { @@ -1186,14 +1265,14 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) * fails to allocate enough buffers and calls here to free * already allocated ones. */ - if (!rx_ring->rxbufs[i].skb) + if (!rx_ring->rxbufs[i].frag) continue; - dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr, - rx_ring->bufsz, DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_ring->rxbufs[i].skb); + nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr, + rx_ring->bufsz, direction); + nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp); rx_ring->rxbufs[i].dma_addr = 0; - rx_ring->rxbufs[i].skb = NULL; + rx_ring->rxbufs[i].frag = NULL; } } @@ -1201,9 +1280,11 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW) * @nn: NFP Net device * @rx_ring: RX ring to remove buffers from + * @xdp: Whether XDP is enabled */ static int -nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) +nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + bool xdp) { struct nfp_net_rx_buf *rxbufs; unsigned int i; @@ -1211,11 +1292,11 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) rxbufs = rx_ring->rxbufs; for (i = 0; i < rx_ring->cnt - 1; i++) { - rxbufs[i].skb = + rxbufs[i].frag = nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr, - rx_ring->bufsz); - if (!rxbufs[i].skb) { - nfp_net_rx_ring_bufs_free(nn, rx_ring); + rx_ring->bufsz, xdp); + if (!rxbufs[i].frag) { + nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp); return -ENOMEM; } } @@ -1232,7 +1313,7 @@ static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) unsigned int i; for (i = 0; i < rx_ring->cnt - 1; i++) - nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb, + nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag, rx_ring->rxbufs[i].dma_addr); } @@ -1359,6 +1440,87 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, return data; } +static void +nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); + if (rxbuf) + nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + +static void +nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off, + unsigned int pkt_len) +{ + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_desc *txd; + dma_addr_t new_dma_addr; + void *new_frag; + int wr_idx; + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); + return; + } + + new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); + return; + } + nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); + + wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->frag = rxbuf->frag; + txbuf->dma_addr = rxbuf->dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = pkt_len; + + dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off, + pkt_len, DMA_TO_DEVICE); + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->l4_offset = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; +} + +static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len) +{ + struct xdp_buff xdp; + + xdp.data = data; + xdp.data_end = data + len; + + return BPF_PROG_RUN(prog, (void *)&xdp); +} + /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1368,62 +1530,39 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, * more cleanly separate packet receive code from other bookkeeping * functions performed in the napi poll function. * - * There are differences between the NFP-3200 firmware and the - * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue - * to indicate that new packets have arrived. The NFP-6000 does not - * have this queue and uses the DD bit in the RX descriptor. This - * method cannot be used on the NFP-3200 as it causes a race - * condition: The RX ring write pointer on the NFP-3200 is updated - * after packets (and descriptors) have been DMAed. If the DD bit is - * used and subsequently the read pointer is updated this may lead to - * the RX queue to underflow (if the firmware has not yet update the - * write pointer). Therefore we use slightly ugly conditional code - * below to handle the differences. We may, in the future update the - * NFP-3200 firmware to behave the same as the firmware on the - * NFP-6000. - * * Return: Number of packets received. */ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) { struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; - unsigned int data_len, meta_len; - int avail = 0, pkts_polled = 0; - struct sk_buff *skb, *new_skb; - struct nfp_net_rx_desc *rxd; - dma_addr_t new_dma_addr; - u32 qcp_wr_p; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + unsigned int true_bufsz; + struct sk_buff *skb; + int pkts_polled = 0; + int rx_dma_map_dir; int idx; - if (nn->is_nfp3200) { - /* Work out how many packets arrived */ - qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); - idx = rx_ring->rd_p % rx_ring->cnt; - - if (qcp_wr_p == idx) - /* No new packets */ - return 0; + rcu_read_lock(); + xdp_prog = READ_ONCE(nn->xdp_prog); + rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz; + tx_ring = r_vec->xdp_ring; - if (qcp_wr_p > idx) - avail = qcp_wr_p - idx; - else - avail = qcp_wr_p + rx_ring->cnt - idx; - } else { - avail = budget + 1; - } + while (pkts_polled < budget) { + unsigned int meta_len, data_len, data_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + void *new_frag; - while (avail > 0 && pkts_polled < budget) { - idx = rx_ring->rd_p % rx_ring->cnt; + idx = rx_ring->rd_p & (rx_ring->cnt - 1); rxd = &rx_ring->rxds[idx]; - if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { - if (nn->is_nfp3200) - nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", - rx_ring->idx, idx, - rxd->vals[0], rxd->vals[1]); + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) break; - } + /* Memory barrier to ensure that we won't do other reads * before the DD bit. */ @@ -1431,27 +1570,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) rx_ring->rd_p++; pkts_polled++; - avail--; - - skb = rx_ring->rxbufs[idx].skb; - - new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, - nn->fl_bufsz); - if (!new_skb) { - nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, - rx_ring->rxbufs[idx].dma_addr); - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_drops++; - u64_stats_update_end(&r_vec->rx_sync); - continue; - } - - dma_unmap_single(&nn->pdev->dev, - rx_ring->rxbufs[idx].dma_addr, - nn->fl_bufsz, DMA_FROM_DEVICE); - - nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + rxbuf = &rx_ring->rxbufs[idx]; /* < meta_len > * <-- [rx_offset] --> * --------------------------------------------------------- @@ -1466,19 +1586,66 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) */ meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - skb_reserve(skb, meta_len); + pkt_off = meta_len; else - skb_reserve(skb, nn->rx_offset); - skb_put(skb, data_len - meta_len); + pkt_off = nn->rx_offset; + data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off; /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; - r_vec->rx_bytes += skb->len; + r_vec->rx_bytes += pkt_len; u64_stats_update_end(&r_vec->rx_sync); + if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && + nn->bpf_offload_xdp)) { + int act; + + dma_sync_single_for_cpu(&nn->pdev->dev, + rxbuf->dma_addr + pkt_off, + pkt_len, DMA_FROM_DEVICE); + act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off, + pkt_len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + nfp_net_tx_xdp_buf(nn, rx_ring, tx_ring, rxbuf, + pkt_off, pkt_len); + continue; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + nfp_net_rx_give_one(rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir, + &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz, + rx_dma_map_dir); + + nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, data_off); + skb_put(skb, pkt_len); + if (nn->fw_ver.major <= 3) { nfp_net_set_hash_desc(nn->netdev, skb, rxd); } else if (meta_len) { @@ -1486,12 +1653,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) end = nfp_net_parse_meta(nn->netdev, skb, meta_len); if (unlikely(end != skb->data)) { - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_drops++; - u64_stats_update_end(&r_vec->rx_sync); - - dev_kfree_skb_any(skb); nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + nfp_net_rx_drop(r_vec, rx_ring, NULL, skb); continue; } } @@ -1508,8 +1671,9 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) napi_gro_receive(&rx_ring->r_vec->napi, skb); } - if (nn->is_nfp3200) - nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); + if (xdp_prog && tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + rcu_read_unlock(); return pkts_polled; } @@ -1525,21 +1689,19 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) { struct nfp_net_r_vector *r_vec = container_of(napi, struct nfp_net_r_vector, napi); - struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; - struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; - struct nfp_net *nn = r_vec->nfp_net; - struct netdev_queue *txq; - unsigned int pkts_polled; - - tx_ring = &nn->tx_rings[rx_ring->idx]; - txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); - nfp_net_tx_complete(tx_ring); + unsigned int pkts_polled = 0; - pkts_polled = nfp_net_rx(rx_ring, budget); + if (r_vec->tx_ring) + nfp_net_tx_complete(r_vec->tx_ring); + if (r_vec->rx_ring) { + pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); + if (r_vec->xdp_ring) + nfp_net_xdp_complete(r_vec->xdp_ring); + } if (pkts_polled < budget) { napi_complete_done(napi, pkts_polled); - nfp_net_irq_unmask(nn, r_vec->irq_idx); + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_idx); } return pkts_polled; @@ -1575,10 +1737,12 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring * @tx_ring: TX Ring structure to allocate * @cnt: Ring buffer count + * @is_xdp: True if ring will be used for XDP * * Return: 0 on success, negative errno otherwise. */ -static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt) +static int +nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; @@ -1598,11 +1762,14 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt) if (!tx_ring->txbufs) goto err_alloc; - netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); + if (!is_xdp) + netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, + tx_ring->idx); - nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", + nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n", tx_ring->idx, tx_ring->qcidx, - tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); + tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds, + is_xdp ? "XDP" : ""); return 0; @@ -1612,23 +1779,29 @@ err_alloc: } static struct nfp_net_tx_ring * -nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt) +nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s, + unsigned int num_stack_tx_rings) { struct nfp_net_tx_ring *rings; unsigned int r; - rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL); + rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL); if (!rings) return NULL; - for (r = 0; r < nn->num_tx_rings; r++) { - nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r); + for (r = 0; r < s->n_rings; r++) { + int bias = 0; + + if (r >= num_stack_tx_rings) + bias = num_stack_tx_rings; + + nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r); - if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt)) + if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias)) goto err_free_prev; } - return rings; + return s->rings = rings; err_free_prev: while (r--) @@ -1637,28 +1810,27 @@ err_free_prev: return NULL; } -static struct nfp_net_tx_ring * -nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings) +static void +nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s) { - struct nfp_net_tx_ring *old = nn->tx_rings; - unsigned int r; + struct nfp_net_ring_set new = *s; - for (r = 0; r < nn->num_tx_rings; r++) - old[r].r_vec->tx_ring = &rings[r]; + s->dcnt = nn->txd_cnt; + s->rings = nn->tx_rings; + s->n_rings = nn->num_tx_rings; - nn->tx_rings = rings; - return old; + nn->txd_cnt = new.dcnt; + nn->tx_rings = new.rings; + nn->num_tx_rings = new.n_rings; } static void -nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings) +nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s) { + struct nfp_net_tx_ring *rings = s->rings; unsigned int r; - if (!rings) - return; - - for (r = 0; r < nn->num_tx_rings; r++) + for (r = 0; r < s->n_rings; r++) nfp_net_tx_ring_free(&rings[r]); kfree(rings); @@ -1730,31 +1902,32 @@ err_alloc: } static struct nfp_net_rx_ring * -nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz, - u32 buf_cnt) +nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s, + bool xdp) { + unsigned int fl_bufsz = nfp_net_calc_fl_bufsz(nn, s->mtu); struct nfp_net_rx_ring *rings; unsigned int r; - rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL); + rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL); if (!rings) return NULL; - for (r = 0; r < nn->num_rx_rings; r++) { - nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r); + for (r = 0; r < s->n_rings; r++) { + nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r); - if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt)) + if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt)) goto err_free_prev; - if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r])) + if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp)) goto err_free_ring; } - return rings; + return s->rings = rings; err_free_prev: while (r--) { - nfp_net_rx_ring_bufs_free(nn, &rings[r]); + nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp); err_free_ring: nfp_net_rx_ring_free(&rings[r]); } @@ -1762,35 +1935,50 @@ err_free_ring: return NULL; } -static struct nfp_net_rx_ring * -nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings) +static void +nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s) { - struct nfp_net_rx_ring *old = nn->rx_rings; - unsigned int r; + struct nfp_net_ring_set new = *s; - for (r = 0; r < nn->num_rx_rings; r++) - old[r].r_vec->rx_ring = &rings[r]; + s->mtu = nn->netdev->mtu; + s->dcnt = nn->rxd_cnt; + s->rings = nn->rx_rings; + s->n_rings = nn->num_rx_rings; - nn->rx_rings = rings; - return old; + nn->netdev->mtu = new.mtu; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu); + nn->rxd_cnt = new.dcnt; + nn->rx_rings = new.rings; + nn->num_rx_rings = new.n_rings; } static void -nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings) +nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s, + bool xdp) { + struct nfp_net_rx_ring *rings = s->rings; unsigned int r; - if (!rings) - return; - - for (r = 0; r < nn->num_r_vecs; r++) { - nfp_net_rx_ring_bufs_free(nn, &rings[r]); + for (r = 0; r < s->n_rings; r++) { + nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp); nfp_net_rx_ring_free(&rings[r]); } kfree(rings); } +static void +nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + int idx) +{ + r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL; + r_vec->tx_ring = + idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL; + + r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ? + &nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL; +} + static int nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, int idx) @@ -1798,25 +1986,20 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx]; int err; - r_vec->tx_ring = &nn->tx_rings[idx]; - nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx); - - r_vec->rx_ring = &nn->rx_rings[idx]; - nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx); + /* Setup NAPI */ + netif_napi_add(nn->netdev, &r_vec->napi, + nfp_net_poll, NAPI_POLL_WEIGHT); snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nn->netdev->name, idx); err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec); if (err) { + netif_napi_del(&r_vec->napi); nn_err(nn, "Error requesting IRQ %d\n", entry->vector); return err; } disable_irq(entry->vector); - /* Setup NAPI */ - netif_napi_add(nn->netdev, &r_vec->napi, - nfp_net_poll, NAPI_POLL_WEIGHT); - irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, entry->vector, entry->entry); @@ -1879,13 +2062,13 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn) /* copy RX interrupt coalesce parameters */ value = (nn->rx_coalesce_max_frames << 16) | (factor * nn->rx_coalesce_usecs); - for (i = 0; i < nn->num_r_vecs; i++) + for (i = 0; i < nn->num_rx_rings; i++) nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); /* copy TX interrupt coalesce parameters */ value = (nn->tx_coalesce_max_frames << 16) | (factor * nn->tx_coalesce_usecs); - for (i = 0; i < nn->num_r_vecs; i++) + for (i = 0; i < nn->num_tx_rings; i++) nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); } @@ -1901,9 +2084,8 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn) { nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(nn->netdev->dev_addr)); - /* We can't do writew for NFP-3200 compatibility */ - nn_writel(nn, NFP_NET_CFG_MACADDR + 4, - get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); + nn_writew(nn, NFP_NET_CFG_MACADDR + 6, + get_unaligned_be16(nn->netdev->dev_addr + 4)); } static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) @@ -1944,27 +2126,33 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) if (err) nn_err(nn, "Could not disable device: %d\n", err); - for (r = 0; r < nn->num_r_vecs; r++) { - nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring); - nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring); + for (r = 0; r < nn->num_rx_rings; r++) + nfp_net_rx_ring_reset(&nn->rx_rings[r]); + for (r = 0; r < nn->num_tx_rings; r++) + nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]); + for (r = 0; r < nn->num_r_vecs; r++) nfp_net_vec_clear_ring_data(nn, r); - } nn->ctrl = new_ctrl; } static void -nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, - unsigned int idx) +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx) { /* Write the DMA address, size and MSI-X info to the device */ - nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), r_vec->rx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(r_vec->rx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), r_vec->irq_idx); + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_idx); +} - nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), r_vec->tx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(r_vec->tx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx); +static void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_idx); } static int __nfp_net_set_config_and_enable(struct nfp_net *nn) @@ -1989,8 +2177,10 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) update |= NFP_NET_CFG_UPDATE_IRQMOD; } - for (r = 0; r < nn->num_r_vecs; r++) - nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r); + for (r = 0; r < nn->num_tx_rings; r++) + nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r); + for (r = 0; r < nn->num_rx_rings; r++) + nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r); nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); @@ -2016,8 +2206,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) nn->ctrl = new_ctrl; - for (r = 0; r < nn->num_r_vecs; r++) - nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring); + for (r = 0; r < nn->num_rx_rings; r++) + nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]); /* Since reconfiguration requests while NFP is down are ignored we * have to wipe the entire VXLAN configuration and reinitialize it. @@ -2068,6 +2258,15 @@ static void nfp_net_open_stack(struct nfp_net *nn) static int nfp_net_netdev_open(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = nn->txd_cnt, + }; int err, r; if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { @@ -2092,39 +2291,29 @@ static int nfp_net_netdev_open(struct net_device *netdev) goto err_free_exn; disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); - nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), - GFP_KERNEL); + for (r = 0; r < nn->num_r_vecs; r++) { + err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); + if (err) + goto err_cleanup_vec_p; + } + + nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog); if (!nn->rx_rings) { err = -ENOMEM; - goto err_free_lsc; + goto err_cleanup_vec; } - nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), - GFP_KERNEL); + + nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx, + nn->num_stack_tx_rings); if (!nn->tx_rings) { err = -ENOMEM; goto err_free_rx_rings; } - for (r = 0; r < nn->num_r_vecs; r++) { - err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); - if (err) - goto err_free_prev_vecs; - - err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt); - if (err) - goto err_cleanup_vec_p; + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r); - err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring, - nn->fl_bufsz, nn->rxd_cnt); - if (err) - goto err_free_tx_ring_p; - - err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring); - if (err) - goto err_flush_rx_ring_p; - } - - err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); + err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings); if (err) goto err_free_rings; @@ -2154,21 +2343,14 @@ static int nfp_net_netdev_open(struct net_device *netdev) return 0; err_free_rings: + nfp_net_tx_ring_set_free(nn, &tx); +err_free_rx_rings: + nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog); +err_cleanup_vec: r = nn->num_r_vecs; -err_free_prev_vecs: - while (r--) { - nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); -err_flush_rx_ring_p: - nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); -err_free_tx_ring_p: - nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); err_cleanup_vec_p: + while (r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - } - kfree(nn->tx_rings); -err_free_rx_rings: - kfree(nn->rx_rings); -err_free_lsc: nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); err_free_exn: nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); @@ -2203,12 +2385,14 @@ static void nfp_net_close_free_all(struct nfp_net *nn) { unsigned int r; - for (r = 0; r < nn->num_r_vecs; r++) { - nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); - nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); - nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); - nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + for (r = 0; r < nn->num_rx_rings; r++) { + nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog); + nfp_net_rx_ring_free(&nn->rx_rings[r]); } + for (r = 0; r < nn->num_tx_rings; r++) + nfp_net_tx_ring_free(&nn->tx_rings[r]); + for (r = 0; r < nn->num_r_vecs; r++) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); kfree(nn->rx_rings); kfree(nn->tx_rings); @@ -2271,94 +2455,137 @@ static void nfp_net_set_rx_mode(struct net_device *netdev) nn->ctrl = new_ctrl; } -static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +static void nfp_net_rss_init_itbl(struct nfp_net *nn) { - unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz; - struct nfp_net *nn = netdev_priv(netdev); - struct nfp_net_rx_ring *tmp_rings; - int err; - - if (new_mtu < 68 || new_mtu > nn->max_mtu) { - nn_err(nn, "New MTU (%d) is not valid\n", new_mtu); - return -EINVAL; - } - - old_mtu = netdev->mtu; - old_fl_bufsz = nn->fl_bufsz; - new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu; - - if (!netif_running(netdev)) { - netdev->mtu = new_mtu; - nn->fl_bufsz = new_fl_bufsz; - return 0; - } + int i; - /* Prepare new rings */ - tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz, - nn->rxd_cnt); - if (!tmp_rings) - return -ENOMEM; + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->num_rx_rings); +} - /* Stop device, swap in new rings, try to start the firmware */ - nfp_net_close_stack(nn); - nfp_net_clear_config_and_disable(nn); +static int +nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs, + unsigned int *stack_tx_rings, + struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, + struct nfp_net_ring_set *tx) +{ + unsigned int r; + int err; - tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings); + if (rx) + nfp_net_rx_ring_set_swap(nn, rx); + if (tx) + nfp_net_tx_ring_set_swap(nn, tx); - netdev->mtu = new_mtu; - nn->fl_bufsz = new_fl_bufsz; + swap(*num_vecs, nn->num_r_vecs); + swap(*stack_tx_rings, nn->num_stack_tx_rings); + *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog); - err = nfp_net_set_config_and_enable(nn); - if (err) { - const int err_new = err; + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r); - /* Try with old configuration and old rings */ - tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings); + if (nn->netdev->real_num_rx_queues != nn->num_rx_rings) { + if (!netif_is_rxfh_configured(nn->netdev)) + nfp_net_rss_init_itbl(nn); - netdev->mtu = old_mtu; - nn->fl_bufsz = old_fl_bufsz; + err = netif_set_real_num_rx_queues(nn->netdev, + nn->num_rx_rings); + if (err) + return err; + } - err = __nfp_net_set_config_and_enable(nn); + if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) { + err = netif_set_real_num_tx_queues(nn->netdev, + nn->num_stack_tx_rings); if (err) - nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n", - err_new, err); + return err; } - nfp_net_shadow_rx_rings_free(nn, tmp_rings); + return __nfp_net_set_config_and_enable(nn); +} - nfp_net_open_stack(nn); +static int +nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx) +{ + /* XDP-enabled tests */ + if (!xdp_prog) + return 0; + if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) { + nn_warn(nn, "MTU too large w/ XDP enabled\n"); + return -EINVAL; + } + if (tx && tx->n_rings > nn->max_tx_rings) { + nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n"); + return -EINVAL; + } - return err; + return 0; } -int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) -{ - struct nfp_net_tx_ring *tx_rings = NULL; - struct nfp_net_rx_ring *rx_rings = NULL; - u32 old_rxd_cnt, old_txd_cnt; +static void +nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, + struct nfp_net_ring_set *tx, + unsigned int stack_tx_rings, unsigned int num_vecs) +{ + nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu); + nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt; + nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt; + nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings; + nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings; + nn->num_stack_tx_rings = stack_tx_rings; + nn->num_r_vecs = num_vecs; + *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog); + + if (!netif_is_rxfh_configured(nn->netdev)) + nfp_net_rss_init_itbl(nn); +} + +int +nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx) +{ + unsigned int stack_tx_rings, num_vecs, r; int err; + stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings; + if (*xdp_prog) + stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings; + + num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings); + + err = nfp_net_check_config(nn, *xdp_prog, rx, tx); + if (err) + return err; + if (!netif_running(nn->netdev)) { - nn->rxd_cnt = rxd_cnt; - nn->txd_cnt = txd_cnt; + nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx, + stack_tx_rings, num_vecs); return 0; } - old_rxd_cnt = nn->rxd_cnt; - old_txd_cnt = nn->txd_cnt; - /* Prepare new rings */ - if (nn->rxd_cnt != rxd_cnt) { - rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz, - rxd_cnt); - if (!rx_rings) - return -ENOMEM; + for (r = nn->num_r_vecs; r < num_vecs; r++) { + err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); + if (err) { + num_vecs = r; + goto err_cleanup_vecs; + } } - if (nn->txd_cnt != txd_cnt) { - tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt); - if (!tx_rings) { - nfp_net_shadow_rx_rings_free(nn, rx_rings); - return -ENOMEM; + if (rx) { + if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) { + err = -ENOMEM; + goto err_cleanup_vecs; + } + } + if (tx) { + if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) { + err = -ENOMEM; + goto err_free_rx; } } @@ -2366,39 +2593,51 @@ int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) nfp_net_close_stack(nn); nfp_net_clear_config_and_disable(nn); - if (rx_rings) - rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings); - if (tx_rings) - tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings); - - nn->rxd_cnt = rxd_cnt; - nn->txd_cnt = txd_cnt; - - err = nfp_net_set_config_and_enable(nn); + err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings, + xdp_prog, rx, tx); if (err) { - const int err_new = err; - - /* Try with old configuration and old rings */ - if (rx_rings) - rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings); - if (tx_rings) - tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings); + int err2; - nn->rxd_cnt = old_rxd_cnt; - nn->txd_cnt = old_txd_cnt; + nfp_net_clear_config_and_disable(nn); - err = __nfp_net_set_config_and_enable(nn); - if (err) + /* Try with old configuration and old rings */ + err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings, + xdp_prog, rx, tx); + if (err2) nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n", - err_new, err); + err, err2); } + for (r = num_vecs - 1; r >= nn->num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - nfp_net_shadow_rx_rings_free(nn, rx_rings); - nfp_net_shadow_tx_rings_free(nn, tx_rings); + if (rx) + nfp_net_rx_ring_set_free(nn, rx, *xdp_prog); + if (tx) + nfp_net_tx_ring_set_free(nn, tx); nfp_net_open_stack(nn); return err; + +err_free_rx: + if (rx) + nfp_net_rx_ring_set_free(nn, rx, *xdp_prog); +err_cleanup_vecs: + for (r = num_vecs - 1; r >= nn->num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + return err; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = new_mtu, + .dcnt = nn->rxd_cnt, + }; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL); } static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, @@ -2455,8 +2694,12 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, if (proto != htons(ETH_P_ALL)) return -ENOTSUPP; - if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) - return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) { + if (!nn->bpf_offload_xdp) + return nfp_net_bpf_offload(nn, tc->cls_bpf); + else + return -EBUSY; + } return -EINVAL; } @@ -2664,6 +2907,87 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev, nfp_net_set_vxlan_port(nn, idx, 0); } +static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct tc_cls_bpf_offload cmd = { + .prog = prog, + }; + int ret; + + if (!nfp_net_ebpf_capable(nn)) + return -EINVAL; + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + if (!nn->bpf_offload_xdp) + return prog ? -EBUSY : 0; + cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY; + } else { + if (!prog) + return 0; + cmd.command = TC_CLSBPF_ADD; + } + + ret = nfp_net_bpf_offload(nn, &cmd); + /* Stop offload if replace not possible */ + if (ret && cmd.command == TC_CLSBPF_REPLACE) + nfp_net_xdp_offload(nn, NULL); + nn->bpf_offload_xdp = prog && !ret; + return ret; +} + +static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = nn->txd_cnt, + }; + int err; + + if (!prog && !nn->xdp_prog) + return 0; + if (prog && nn->xdp_prog) { + prog = xchg(&nn->xdp_prog, prog); + bpf_prog_put(prog); + nfp_net_xdp_offload(nn, nn->xdp_prog); + return 0; + } + + tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings; + + /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ + err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx); + if (err) + return err; + + /* @prog got swapped and is now the old one */ + if (prog) + bpf_prog_put(prog); + + nfp_net_xdp_offload(nn, nn->xdp_prog); + + return 0; +} + +static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nfp_net_xdp_setup(nn, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!nn->xdp_prog; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops nfp_net_netdev_ops = { .ndo_open = nfp_net_netdev_open, .ndo_stop = nfp_net_netdev_close, @@ -2678,6 +3002,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_features_check = nfp_net_features_check, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, + .ndo_xdp = nfp_net_xdp, }; /** @@ -2686,8 +3011,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { */ void nfp_net_info(struct nfp_net *nn) { - nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", - nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", + nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", nn->is_vf ? "VF " : "", nn->num_tx_rings, nn->max_tx_rings, nn->num_rx_rings, nn->max_rx_rings); @@ -2728,11 +3052,11 @@ void nfp_net_info(struct nfp_net *nn) * Return: NFP Net device structure, or ERR_PTR on error. */ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, - int max_tx_rings, int max_rx_rings) + unsigned int max_tx_rings, + unsigned int max_rx_rings) { struct net_device *netdev; struct nfp_net *nn; - int nqs; netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), max_tx_rings, max_rx_rings); @@ -2748,9 +3072,12 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->max_tx_rings = max_tx_rings; nn->max_rx_rings = max_rx_rings; - nqs = netif_get_num_default_rss_queues(); - nn->num_tx_rings = min_t(int, nqs, max_tx_rings); - nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus()); + nn->num_rx_rings = min_t(unsigned int, max_rx_rings, + netif_get_num_default_rss_queues()); + + nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings); + nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus()); nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; @@ -2782,13 +3109,9 @@ void nfp_net_netdev_free(struct nfp_net *nn) */ static void nfp_net_rss_init(struct nfp_net *nn) { - int i; - netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); - for (i = 0; i < sizeof(nn->rss_itbl); i++) - nn->rss_itbl[i] = - ethtool_rxfh_indir_default(i, nn->num_rx_rings); + nfp_net_rss_init_itbl(nn); /* Enable IPv4/IPv6 TCP by default */ nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | @@ -2826,12 +3149,18 @@ int nfp_net_netdev_init(struct net_device *netdev) nfp_net_write_mac_addr(nn); + /* Determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) + nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + else + nn->rx_offset = NFP_NET_RX_OFFSET; + /* Set default MTU and Freelist buffer size */ if (nn->max_mtu < NFP_NET_DEFAULT_MTU) netdev->mtu = nn->max_mtu; else netdev->mtu = NFP_NET_DEFAULT_MTU; - nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu); /* Advertise/enable offloads based on capabilities * @@ -2902,18 +3231,6 @@ int nfp_net_netdev_init(struct net_device *netdev) nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } - /* On NFP-3200 enable MSI-X auto-masking, if supported and the - * interrupts are not shared. - */ - if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) - nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; - - /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ - if (nn->fw_ver.major >= 2) - nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); - else - nn->rx_offset = NFP_NET_RX_OFFSET; - /* Stash the re-configuration queue away. First odd queue in TX Bar */ nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; @@ -2927,9 +3244,13 @@ int nfp_net_netdev_init(struct net_device *netdev) return err; /* Finalise the netdev setup */ - ether_setup(netdev); netdev->netdev_ops = &nfp_net_netdev_ops; netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = nn->max_mtu; + netif_carrier_off(netdev); nfp_net_set_ethtool_ops(netdev); @@ -2944,5 +3265,11 @@ int nfp_net_netdev_init(struct net_device *netdev) */ void nfp_net_netdev_clean(struct net_device *netdev) { - unregister_netdev(netdev); + struct nfp_net *nn = netdev_priv(netdev); + + if (nn->xdp_prog) + bpf_prog_put(nn->xdp_prog); + if (nn->bpf_offload_xdp) + nfp_net_xdp_offload(nn, NULL); + unregister_netdev(nn->netdev); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 93b10b441acb..385ba355c965 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -50,7 +50,7 @@ /** * Configuration BAR size. * - * The configuration BAR is 8K in size, but on the NFP6000, due to + * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ #define NFP_NET_CFG_BAR_SZ (32 * 1024) @@ -186,18 +186,13 @@ #define NFP_NET_CFG_START_RXQ 0x004c /** - * NFP-3200 workaround (0x0050 - 0x0058) - * @NFP_NET_CFG_SPARE_ADDR: DMA address for ME code to use (e.g. YDS-155 fix) - */ -#define NFP_NET_CFG_SPARE_ADDR 0x0050 -/** - * NFP6000/NFP4000 - Prepend configuration + * Prepend configuration */ #define NFP_NET_CFG_RX_OFFSET 0x0050 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ /** - * NFP6000/NFP4000 - VXLAN/UDP encap configuration + * VXLAN/UDP encap configuration * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ @@ -205,7 +200,7 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * NFP6000 - BPF section + * BPF section * @NFP_NET_CFG_BPF_ABI: BPF ABI version * @NFP_NET_CFG_BPF_CAP: BPF capabilities * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index f7c9a5bc4aa3..c66f3f954aa8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -44,8 +44,8 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) struct nfp_net_r_vector *r_vec = file->private; struct nfp_net_rx_ring *rx_ring; struct nfp_net_rx_desc *rxd; - struct sk_buff *skb; struct nfp_net *nn; + void *frag; int i; rtnl_lock(); @@ -73,10 +73,9 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) seq_printf(file, "%04d: 0x%08x 0x%08x", i, rxd->vals[0], rxd->vals[1]); - skb = READ_ONCE(rx_ring->rxbufs[i].skb); - if (skb) - seq_printf(file, " skb->head=%p skb->data=%p", - skb->head, skb->data); + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); if (rx_ring->rxbufs[i].dma_addr) seq_printf(file, " dma_addr=%pad", @@ -115,6 +114,16 @@ static const struct file_operations nfp_rx_q_fops = { .llseek = seq_lseek }; +static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f); + +static const struct file_operations nfp_tx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_tx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) { struct nfp_net_r_vector *r_vec = file->private; @@ -127,10 +136,13 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) rtnl_lock(); - if (!r_vec->nfp_net || !r_vec->tx_ring) + if (debugfs_real_fops(file->file) == &nfp_tx_q_fops) + tx_ring = r_vec->tx_ring; + else + tx_ring = r_vec->xdp_ring; + if (!r_vec->nfp_net || !tx_ring) goto out; nn = r_vec->nfp_net; - tx_ring = r_vec->tx_ring; if (!netif_running(nn->netdev)) goto out; @@ -149,9 +161,14 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) txd->vals[2], txd->vals[3]); skb = READ_ONCE(tx_ring->txbufs[i].skb); - if (skb) - seq_printf(file, " skb->head=%p skb->data=%p", - skb->head, skb->data); + if (skb) { + if (tx_ring == r_vec->tx_ring) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + else + seq_printf(file, " frag=%p", skb); + } + if (tx_ring->txbufs[i].dma_addr) seq_printf(file, " dma_addr=%pad", &tx_ring->txbufs[i].dma_addr); @@ -177,7 +194,7 @@ static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f) return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private); } -static const struct file_operations nfp_tx_q_fops = { +static const struct file_operations nfp_xdp_q_fops = { .owner = THIS_MODULE, .open = nfp_net_debugfs_tx_q_open, .release = single_release, @@ -187,7 +204,7 @@ static const struct file_operations nfp_tx_q_fops = { void nfp_net_debugfs_adapter_add(struct nfp_net *nn) { - struct dentry *queues, *tx, *rx; + struct dentry *queues, *tx, *rx, *xdp; char int_name[16]; int i; @@ -205,16 +222,19 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn) rx = debugfs_create_dir("rx", queues); tx = debugfs_create_dir("tx", queues); - if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx)) + xdp = debugfs_create_dir("xdp", queues); + if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp)) return; - for (i = 0; i < nn->num_rx_rings; i++) { + for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) { sprintf(int_name, "%d", i); debugfs_create_file(int_name, S_IRUSR, rx, &nn->r_vecs[i], &nfp_rx_q_fops); + debugfs_create_file(int_name, S_IRUSR, xdp, + &nn->r_vecs[i], &nfp_xdp_q_fops); } - for (i = 0; i < nn->num_tx_rings; i++) { + for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) { sprintf(int_name, "%d", i); debugfs_create_file(int_name, S_IRUSR, tx, &nn->r_vecs[i], &nfp_tx_q_fops); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 3418f2277e9d..1b26e9646574 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -158,6 +158,28 @@ static void nfp_net_get_ringparam(struct net_device *netdev, ring->tx_pending = nn->txd_cnt; } +static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) +{ + struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL; + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = txd_cnt, + }; + + if (nn->rxd_cnt != rxd_cnt) + reconfig_rx = ℞ + if (nn->txd_cnt != txd_cnt) + reconfig_tx = &tx; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, + reconfig_rx, reconfig_tx); +} + static int nfp_net_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { @@ -614,6 +636,76 @@ static int nfp_net_set_coalesce(struct net_device *netdev, return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); } +static void nfp_net_get_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int num_tx_rings; + + num_tx_rings = nn->num_tx_rings; + if (nn->xdp_prog) + num_tx_rings -= nn->num_rx_rings; + + channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs); + channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs); + channel->max_combined = min(channel->max_rx, channel->max_tx); + channel->max_other = NFP_NET_NON_Q_VECTORS; + channel->combined_count = min(nn->num_rx_rings, num_tx_rings); + channel->rx_count = nn->num_rx_rings - channel->combined_count; + channel->tx_count = num_tx_rings - channel->combined_count; + channel->other_count = NFP_NET_NON_Q_VECTORS; +} + +static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx, + unsigned int total_tx) +{ + struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL; + struct nfp_net_ring_set rx = { + .n_rings = total_rx, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = total_tx, + .dcnt = nn->txd_cnt, + }; + + if (nn->num_rx_rings != total_rx) + reconfig_rx = ℞ + if (nn->num_stack_tx_rings != total_tx || + (nn->xdp_prog && reconfig_rx)) + reconfig_tx = &tx; + + /* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */ + if (nn->xdp_prog) + tx.n_rings += total_rx; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, + reconfig_rx, reconfig_tx); +} + +static int nfp_net_set_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int total_rx, total_tx; + + /* Reject unsupported */ + if (!channel->combined_count || + channel->other_count != NFP_NET_NON_Q_VECTORS || + (channel->rx_count && channel->tx_count)) + return -EINVAL; + + total_rx = channel->combined_count + channel->rx_count; + total_tx = channel->combined_count + channel->tx_count; + + if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) || + total_tx > min(nn->max_tx_rings, nn->max_r_vecs)) + return -EINVAL; + + return nfp_net_set_num_rings(nn, total_rx, total_tx); +} + static const struct ethtool_ops nfp_net_ethtool_ops = { .get_drvinfo = nfp_net_get_drvinfo, .get_link = ethtool_op_get_link, @@ -632,6 +724,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .get_regs = nfp_net_get_regs, .get_coalesce = nfp_net_get_coalesce, .set_coalesce = nfp_net_set_coalesce, + .get_channels = nfp_net_get_channels, + .set_channels = nfp_net_set_channels, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 8acfb631a0ea..18a851eb3508 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -111,6 +111,9 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) const struct tc_action *a; LIST_HEAD(actions); + if (!cls_bpf->exts) + return NN_ACT_XDP; + /* TC direct action */ if (cls_bpf->exts_integrated) { if (tc_no_actions(cls_bpf->exts)) @@ -128,7 +131,7 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) if (is_tcf_gact_shot(a)) return NN_ACT_TC_DROP; - if (is_tcf_mirred_redirect(a) && + if (is_tcf_mirred_egress_redirect(a) && tcf_mirred_ifindex(a) == nn->netdev->ifindex) return NN_ACT_TC_REDIR; } @@ -233,9 +236,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } -int -nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, - struct tc_cls_bpf_offload *cls_bpf) +int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) { struct nfp_bpf_result res; dma_addr_t dma_addr; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index 2800bbf65a89..d065235034d4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -63,9 +63,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn) u8 mac_addr[ETH_ALEN]; put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); - /* We can't do readw for NFP-3200 compatibility */ - put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16, - &mac_addr[4]); + put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]); if (!is_valid_ether_addr(mac_addr)) { eth_hw_addr_random(nn->netdev); @@ -86,7 +84,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, int tx_bar_no, rx_bar_no; u8 __iomem *ctrl_bar; struct nfp_net *nn; - int is_nfp3200; u32 startq; int stride; int err; @@ -101,15 +98,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - switch (pdev->device) { - case PCI_DEVICE_NFP6000VF: - is_nfp3200 = 0; - break; - default: - err = -ENODEV; - goto err_pci_regions; - } - pci_set_master(pdev); err = dma_set_mask_and_coherent(&pdev->dev, @@ -149,15 +137,9 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } else { switch (fw_ver.major) { case 1 ... 4: - if (is_nfp3200) { - stride = 2; - tx_bar_no = NFP_NET_Q0_BAR; - rx_bar_no = NFP_NET_Q1_BAR; - } else { - stride = 4; - tx_bar_no = NFP_NET_Q0_BAR; - rx_bar_no = tx_bar_no; - } + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; break; default: dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", @@ -189,20 +171,10 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; } - /* XXX Implement a workaround for THB-350 here. Ideally, we - * have a different PCI ID for A rev VFs. - */ - switch (pdev->device) { - case PCI_DEVICE_NFP6000VF: - startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); - tx_bar_off = NFP_PCIE_QUEUE(startq); - startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); - rx_bar_off = NFP_PCIE_QUEUE(startq); - break; - default: - err = -ENODEV; - goto err_ctrl_unmap; - } + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = NFP_PCIE_QUEUE(startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = NFP_PCIE_QUEUE(startq); /* Allocate and initialise the netdev */ nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings); @@ -214,7 +186,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, nn->fw_ver = fw_ver; nn->ctrl_bar = ctrl_bar; nn->is_vf = 1; - nn->is_nfp3200 = is_nfp3200; nn->stride_tx = stride; nn->stride_rx = stride; diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c index adbc47f2d132..df4188cb43e0 100644 --- a/drivers/net/ethernet/netx-eth.c +++ b/drivers/net/ethernet/netx-eth.c @@ -304,7 +304,6 @@ static const struct net_device_ops netx_eth_netdev_ops = { .ndo_start_xmit = netx_eth_hard_start_xmit, .ndo_tx_timeout = netx_eth_timeout, .ndo_set_rx_mode = netx_eth_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 712d8bcb7d8c..119f6dca71f0 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -915,7 +915,6 @@ static const struct net_device_ops w90p910_ether_netdev_ops = { .ndo_set_mac_address = w90p910_set_mac_address, .ndo_do_ioctl = w90p910_ether_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static void __init get_mac_address(struct net_device *dev) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 9b0d7f463ff3..3913f07279d2 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3008,17 +3008,12 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) struct fe_priv *np = netdev_priv(dev); int old_mtu; - if (new_mtu < 64 || new_mtu > np->pkt_limit) - return -EINVAL; - old_mtu = dev->mtu; dev->mtu = new_mtu; /* return early if the buffer sizes will not change */ if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN) return 0; - if (old_mtu == new_mtu) - return 0; /* synchronized against open : rtnl_lock() held by caller */ if (netif_running(dev)) { @@ -5719,6 +5714,10 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) /* Add loopback capability to the device. */ dev->hw_features |= NETIF_F_LOOPBACK; + /* MTU range: 64 - 1500 or 9100 */ + dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN; + dev->max_mtu = np->pkt_limit; + np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG; if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) || (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) || diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 8e13ec84c538..dd6b0d0f7fa5 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1256,7 +1256,6 @@ static const struct net_device_ops lpc_netdev_ops = { .ndo_do_ioctl = lpc_eth_ioctl, .ndo_set_mac_address = lpc_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int lpc_eth_drv_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3cd87a41ac92..d461f419948e 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2260,16 +2260,10 @@ static int pch_gbe_set_mac(struct net_device *netdev, void *addr) static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); - int max_frame; + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; unsigned long old_rx_buffer_len = adapter->rx_buffer_len; int err; - max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || - (max_frame > PCH_GBE_MAX_JUMBO_FRAME_SIZE)) { - netdev_err(netdev, "Invalid MTU setting\n"); - return -EINVAL; - } if (max_frame <= PCH_GBE_FRAME_SIZE_2048) adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_2048; else if (max_frame <= PCH_GBE_FRAME_SIZE_4096) @@ -2633,6 +2627,11 @@ static int pch_gbe_probe(struct pci_dev *pdev, netdev->features = netdev->hw_features; pch_gbe_set_ethtool_ops(netdev); + /* MTU range: 46 - 10300 */ + netdev->min_mtu = ETH_ZLEN - ETH_HLEN; + netdev->max_mtu = PCH_GBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + pch_gbe_mac_load_mac_addr(&adapter->hw); pch_gbe_mac_reset_hw(&adapter->hw); diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 91be2f02ef1c..2d04679a923a 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -568,7 +568,6 @@ static const struct net_device_ops hamachi_netdev_ops = { .ndo_start_xmit = hamachi_start_xmit, .ndo_get_stats = hamachi_get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_tx_timeout = hamachi_tx_timeout, diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index fb1d1031b091..2a2ca5fa0c69 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -360,7 +360,6 @@ static const struct net_device_ops netdev_ops = { .ndo_stop = yellowfin_close, .ndo_start_xmit = yellowfin_start_xmit, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_do_ioctl = netdev_ioctl, diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 2f4a837f0d6a..badfa1d562a4 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -53,7 +53,7 @@ * - Multiqueue RX/TX */ -#define PE_MIN_MTU 64 +#define PE_MIN_MTU (ETH_ZLEN + ETH_HLEN) #define PE_MAX_MTU 9000 #define PE_DEF_MTU ETH_DATA_LEN @@ -1611,9 +1611,6 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) int running; int ret = 0; - if (new_mtu < PE_MIN_MTU || new_mtu > PE_MAX_MTU) - return -EINVAL; - running = netif_running(dev); if (running) { @@ -1635,7 +1632,7 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) } /* Setup checksum channels if large MTU and none already allocated */ - if (new_mtu > 1500 && !mac->num_cs) { + if (new_mtu > PE_DEF_MTU && !mac->num_cs) { pasemi_mac_setup_csrings(mac); if (!mac->num_cs) { ret = -ENOMEM; @@ -1757,6 +1754,11 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev->netdev_ops = &pasemi_netdev_ops; dev->mtu = PE_DEF_MTU; + + /* MTU range: 64 - 9000 */ + dev->min_mtu = PE_MIN_MTU; + dev->max_mtu = PE_MAX_MTU; + /* 1500 MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */ mac->bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + LOCAL_SKB_ALIGN + 128; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index 2b10f1bcd151..a996801d442d 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -987,20 +987,8 @@ int netxen_send_lro_cleanup(struct netxen_adapter *adapter) int netxen_nic_change_mtu(struct net_device *netdev, int mtu) { struct netxen_adapter *adapter = netdev_priv(netdev); - int max_mtu; int rc = 0; - if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) - max_mtu = P3_MAX_MTU; - else - max_mtu = P2_MAX_MTU; - - if (mtu > max_mtu) { - printk(KERN_ERR "%s: mtu > %d bytes unsupported\n", - netdev->name, max_mtu); - return -EINVAL; - } - if (adapter->set_mtu) rc = adapter->set_mtu(adapter, mtu); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 7a0281a36c28..561fb94c7267 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1572,6 +1572,13 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->physical_port = i; } + /* MTU range: 0 - 8000 (P2) or 9600 (P3) */ + netdev->min_mtu = 0; + if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) + netdev->max_mtu = P3_MAX_MTU; + else + netdev->max_mtu = P2_MAX_MTU; + netxen_nic_clear_stats(adapter); err = netxen_setup_intr(adapter); diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 653bb5735f0c..50b8a01ff512 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -154,7 +154,10 @@ struct qed_qm_iids { u32 tids; }; -enum QED_RESOURCES { +/* HW / FW resources, output of features supported below, most information + * is received from MFW. + */ +enum qed_resources { QED_SB, QED_L2_QUEUE, QED_VPORT, @@ -166,6 +169,7 @@ enum QED_RESOURCES { QED_RDMA_CNQ_RAM, QED_ILT, QED_LL2_QUEUE, + QED_CMDQS_CQS, QED_RDMA_STATS_QUEUE, QED_MAX_RESC, }; @@ -174,6 +178,7 @@ enum QED_FEATURE { QED_PF_L2_QUE, QED_VF, QED_RDMA_CNQ, + QED_VF_L2_QUE, QED_MAX_FEATURES, }; @@ -195,6 +200,11 @@ enum qed_dev_cap { QED_DEV_CAP_ROCE, }; +enum qed_wol_support { + QED_WOL_SUPPORT_NONE, + QED_WOL_SUPPORT_PME, +}; + struct qed_hw_info { /* PCI personality */ enum qed_pci_personality personality; @@ -226,6 +236,9 @@ struct qed_hw_info { u32 port_mode; u32 hw_mode; unsigned long device_capabilities; + u16 mtu; + + enum qed_wol_support b_wol_support; }; struct qed_hw_cid_data { @@ -538,7 +551,9 @@ struct qed_dev { u8 mcp_rev; u8 boot_mode; - u8 wol; + /* WoL related configurations */ + u8 wol_config; + u8 wol_mac[ETH_ALEN]; u32 int_mode; enum qed_coalescing_mode int_coalescing_mode; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index edae5fc5fccd..5be7b8a25425 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1057,8 +1057,10 @@ int qed_hw_init(struct qed_dev *cdev, bool allow_npar_tx_switch, const u8 *bin_fw_data) { - u32 load_code, param; - int rc, mfw_rc, i; + u32 load_code, param, drv_mb_param; + bool b_default_mtu = true; + struct qed_hwfn *p_hwfn; + int rc = 0, mfw_rc, i; if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) { DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n"); @@ -1074,6 +1076,12 @@ int qed_hw_init(struct qed_dev *cdev, for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + /* If management didn't provide a default, set one of our own */ + if (!p_hwfn->hw_info.mtu) { + p_hwfn->hw_info.mtu = 1500; + b_default_mtu = false; + } + if (IS_VF(cdev)) { p_hwfn->b_int_enabled = 1; continue; @@ -1157,6 +1165,38 @@ int qed_hw_init(struct qed_dev *cdev, p_hwfn->hw_init_done = true; } + if (IS_PF(cdev)) { + p_hwfn = QED_LEADING_HWFN(cdev); + drv_mb_param = (FW_MAJOR_VERSION << 24) | + (FW_MINOR_VERSION << 16) | + (FW_REVISION_VERSION << 8) | + (FW_ENGINEERING_VERSION); + rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, + DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER, + drv_mb_param, &load_code, ¶m); + if (rc) + DP_INFO(p_hwfn, "Failed to update firmware version\n"); + + if (!b_default_mtu) { + rc = qed_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt, + p_hwfn->hw_info.mtu); + if (rc) + DP_INFO(p_hwfn, + "Failed to update default mtu\n"); + } + + rc = qed_mcp_ov_update_driver_state(p_hwfn, + p_hwfn->p_main_ptt, + QED_OV_DRIVER_STATE_DISABLED); + if (rc) + DP_INFO(p_hwfn, "Failed to update driver state\n"); + + rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, + QED_OV_ESWITCH_VEB); + if (rc) + DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); + } + return 0; } @@ -1324,8 +1364,24 @@ int qed_hw_reset(struct qed_dev *cdev) { int rc = 0; u32 unload_resp, unload_param; + u32 wol_param; int i; + switch (cdev->wol_config) { + case QED_OV_WOL_DISABLED: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED; + break; + case QED_OV_WOL_ENABLED: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED; + break; + default: + DP_NOTICE(cdev, + "Unknown WoL configuration %02x\n", cdev->wol_config); + /* Fallthrough */ + case QED_OV_WOL_DEFAULT: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; + } + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -1354,8 +1410,7 @@ int qed_hw_reset(struct qed_dev *cdev) /* Send unload command to MCP */ rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, - DRV_MSG_CODE_UNLOAD_REQ, - DRV_MB_PARAM_UNLOAD_WOL_MCP, + DRV_MSG_CODE_UNLOAD_REQ, wol_param, &unload_resp, &unload_param); if (rc) { DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n"); @@ -1421,6 +1476,7 @@ static void get_function_id(struct qed_hwfn *p_hwfn) static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) { u32 *feat_num = p_hwfn->hw_info.feat_num; + struct qed_sb_cnt_info sb_cnt_info; int num_features = 1; if (IS_ENABLED(CONFIG_QED_RDMA) && @@ -1439,53 +1495,257 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); - DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, - "#PF_L2_QUEUES=%d #SBS=%d num_features=%d\n", - feat_num[QED_PF_L2_QUE], RESC_NUM(p_hwfn, QED_SB), - num_features); + + memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); + qed_int_get_num_sbs(p_hwfn, &sb_cnt_info); + feat_num[QED_VF_L2_QUE] = + min_t(u32, + RESC_NUM(p_hwfn, QED_L2_QUEUE) - + FEAT_NUM(p_hwfn, QED_PF_L2_QUE), sb_cnt_info.sb_iov_cnt); + + DP_VERBOSE(p_hwfn, + NETIF_MSG_PROBE, + "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #SBS=%d num_features=%d\n", + (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE), + (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE), + (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ), + RESC_NUM(p_hwfn, QED_SB), num_features); } -static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) +static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id) +{ + enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID; + + switch (res_id) { + case QED_SB: + mfw_res_id = RESOURCE_NUM_SB_E; + break; + case QED_L2_QUEUE: + mfw_res_id = RESOURCE_NUM_L2_QUEUE_E; + break; + case QED_VPORT: + mfw_res_id = RESOURCE_NUM_VPORT_E; + break; + case QED_RSS_ENG: + mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E; + break; + case QED_PQ: + mfw_res_id = RESOURCE_NUM_PQ_E; + break; + case QED_RL: + mfw_res_id = RESOURCE_NUM_RL_E; + break; + case QED_MAC: + case QED_VLAN: + /* Each VFC resource can accommodate both a MAC and a VLAN */ + mfw_res_id = RESOURCE_VFC_FILTER_E; + break; + case QED_ILT: + mfw_res_id = RESOURCE_ILT_E; + break; + case QED_LL2_QUEUE: + mfw_res_id = RESOURCE_LL2_QUEUE_E; + break; + case QED_RDMA_CNQ_RAM: + case QED_CMDQS_CQS: + /* CNQ/CMDQS are the same resource */ + mfw_res_id = RESOURCE_CQS_E; + break; + case QED_RDMA_STATS_QUEUE: + mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E; + break; + default: + break; + } + + return mfw_res_id; +} + +static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn, + enum qed_resources res_id) { - u8 enabled_func_idx = p_hwfn->enabled_func_idx; - u32 *resc_start = p_hwfn->hw_info.resc_start; u8 num_funcs = p_hwfn->num_funcs_on_engine; - u32 *resc_num = p_hwfn->hw_info.resc_num; struct qed_sb_cnt_info sb_cnt_info; - int i, max_vf_vlan_filters; + u32 dflt_resc_num = 0; - memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); + switch (res_id) { + case QED_SB: + memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); + qed_int_get_num_sbs(p_hwfn, &sb_cnt_info); + dflt_resc_num = sb_cnt_info.sb_cnt; + break; + case QED_L2_QUEUE: + dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs; + break; + case QED_VPORT: + dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs; + break; + case QED_RSS_ENG: + dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs; + break; + case QED_PQ: + /* The granularity of the PQs is 8 */ + dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs; + dflt_resc_num &= ~0x7; + break; + case QED_RL: + dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs; + break; + case QED_MAC: + case QED_VLAN: + /* Each VFC resource can accommodate both a MAC and a VLAN */ + dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs; + break; + case QED_ILT: + dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs; + break; + case QED_LL2_QUEUE: + dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs; + break; + case QED_RDMA_CNQ_RAM: + case QED_CMDQS_CQS: + /* CNQ/CMDQS are the same resource */ + dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs; + break; + case QED_RDMA_STATS_QUEUE: + dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs; + break; + default: + break; + } -#ifdef CONFIG_QED_SRIOV - max_vf_vlan_filters = QED_ETH_MAX_VF_NUM_VLAN_FILTERS; -#else - max_vf_vlan_filters = 0; -#endif + return dflt_resc_num; +} + +static const char *qed_hw_get_resc_name(enum qed_resources res_id) +{ + switch (res_id) { + case QED_SB: + return "SB"; + case QED_L2_QUEUE: + return "L2_QUEUE"; + case QED_VPORT: + return "VPORT"; + case QED_RSS_ENG: + return "RSS_ENG"; + case QED_PQ: + return "PQ"; + case QED_RL: + return "RL"; + case QED_MAC: + return "MAC"; + case QED_VLAN: + return "VLAN"; + case QED_RDMA_CNQ_RAM: + return "RDMA_CNQ_RAM"; + case QED_ILT: + return "ILT"; + case QED_LL2_QUEUE: + return "LL2_QUEUE"; + case QED_CMDQS_CQS: + return "CMDQS_CQS"; + case QED_RDMA_STATS_QUEUE: + return "RDMA_STATS_QUEUE"; + default: + return "UNKNOWN_RESOURCE"; + } +} - qed_int_get_num_sbs(p_hwfn, &sb_cnt_info); +static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn, + enum qed_resources res_id) +{ + u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param; + u32 *p_resc_num, *p_resc_start; + struct resource_info resc_info; + int rc; + + p_resc_num = &RESC_NUM(p_hwfn, res_id); + p_resc_start = &RESC_START(p_hwfn, res_id); + + /* Default values assumes that each function received equal share */ + dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id); + if (!dflt_resc_num) { + DP_ERR(p_hwfn, + "Failed to get default amount for resource %d [%s]\n", + res_id, qed_hw_get_resc_name(res_id)); + return -EINVAL; + } + dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx; + + memset(&resc_info, 0, sizeof(resc_info)); + resc_info.res_id = qed_hw_get_mfw_res_id(res_id); + if (resc_info.res_id == RESOURCE_NUM_INVALID) { + DP_ERR(p_hwfn, + "Failed to match resource %d [%s] with the MFW resources\n", + res_id, qed_hw_get_resc_name(res_id)); + return -EINVAL; + } + + rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info, + &mcp_resp, &mcp_param); + if (rc) { + DP_NOTICE(p_hwfn, + "MFW response failure for an allocation request for resource %d [%s]\n", + res_id, qed_hw_get_resc_name(res_id)); + return rc; + } - resc_num[QED_SB] = min_t(u32, - (MAX_SB_PER_PATH_BB / num_funcs), - sb_cnt_info.sb_cnt); - resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs; - resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs; - resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs; - resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs; - resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]); - resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs; - resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) / - num_funcs; - resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; - resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; - resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs; - resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / - num_funcs; - - for (i = 0; i < QED_MAX_RESC; i++) - resc_start[i] = resc_num[i] * enabled_func_idx; + /* Default driver values are applied in the following cases: + * - The resource allocation MB command is not supported by the MFW + * - There is an internal error in the MFW while processing the request + * - The resource ID is unknown to the MFW + */ + if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK && + mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) { + DP_NOTICE(p_hwfn, + "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n", + res_id, + qed_hw_get_resc_name(res_id), + mcp_resp, dflt_resc_num, dflt_resc_start); + *p_resc_num = dflt_resc_num; + *p_resc_start = dflt_resc_start; + goto out; + } + + /* Special handling for status blocks; Would be revised in future */ + if (res_id == QED_SB) { + resc_info.size -= 1; + resc_info.offset -= p_hwfn->enabled_func_idx; + } + + *p_resc_num = resc_info.size; + *p_resc_start = resc_info.offset; + +out: + /* PQs have to divide by 8 [that's the HW granularity]. + * Reduce number so it would fit. + */ + if ((res_id == QED_PQ) && ((*p_resc_num % 8) || (*p_resc_start % 8))) { + DP_INFO(p_hwfn, + "PQs need to align by 8; Number %08x --> %08x, Start %08x --> %08x\n", + *p_resc_num, + (*p_resc_num) & ~0x7, + *p_resc_start, (*p_resc_start) & ~0x7); + *p_resc_num &= ~0x7; + *p_resc_start &= ~0x7; + } + + return 0; +} + +static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) +{ + u8 res_id; + int rc; + + for (res_id = 0; res_id < QED_MAX_RESC; res_id++) { + rc = qed_hw_set_resc_info(p_hwfn, res_id); + if (rc) + return rc; + } /* Sanity for ILT */ - if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) { + if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) { DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n", RESC_START(p_hwfn, QED_ILT), RESC_END(p_hwfn, QED_ILT) - 1); @@ -1495,34 +1755,12 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) qed_hw_set_feat(p_hwfn); DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, - "The numbers for each resource are:\n" - "SB = %d start = %d\n" - "L2_QUEUE = %d start = %d\n" - "VPORT = %d start = %d\n" - "PQ = %d start = %d\n" - "RL = %d start = %d\n" - "MAC = %d start = %d\n" - "VLAN = %d start = %d\n" - "ILT = %d start = %d\n" - "LL2_QUEUE = %d start = %d\n", - p_hwfn->hw_info.resc_num[QED_SB], - p_hwfn->hw_info.resc_start[QED_SB], - p_hwfn->hw_info.resc_num[QED_L2_QUEUE], - p_hwfn->hw_info.resc_start[QED_L2_QUEUE], - p_hwfn->hw_info.resc_num[QED_VPORT], - p_hwfn->hw_info.resc_start[QED_VPORT], - p_hwfn->hw_info.resc_num[QED_PQ], - p_hwfn->hw_info.resc_start[QED_PQ], - p_hwfn->hw_info.resc_num[QED_RL], - p_hwfn->hw_info.resc_start[QED_RL], - p_hwfn->hw_info.resc_num[QED_MAC], - p_hwfn->hw_info.resc_start[QED_MAC], - p_hwfn->hw_info.resc_num[QED_VLAN], - p_hwfn->hw_info.resc_start[QED_VLAN], - p_hwfn->hw_info.resc_num[QED_ILT], - p_hwfn->hw_info.resc_start[QED_ILT], - RESC_NUM(p_hwfn, QED_LL2_QUEUE), - RESC_START(p_hwfn, QED_LL2_QUEUE)); + "The numbers for each resource are:\n"); + for (res_id = 0; res_id < QED_MAX_RESC; res_id++) + DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n", + qed_hw_get_resc_name(res_id), + RESC_NUM(p_hwfn, res_id), + RESC_START(p_hwfn, res_id)); return 0; } @@ -1801,6 +2039,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, qed_get_num_funcs(p_hwfn, p_ptt); + if (qed_mcp_is_init(p_hwfn)) + p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu; + return qed_hw_get_resc(p_hwfn); } @@ -1975,8 +2216,13 @@ int qed_hw_prepare(struct qed_dev *cdev, void qed_hw_remove(struct qed_dev *cdev) { + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); int i; + if (IS_PF(cdev)) + qed_mcp_ov_update_driver_state(p_hwfn, p_hwfn->p_main_ptt, + QED_OV_DRIVER_STATE_NOT_LOADED); + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 2777d5bb4380..785ab03683eb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8526,6 +8526,41 @@ struct mdump_config_stc { u32 valid_logs; }; +enum resource_id_enum { + RESOURCE_NUM_SB_E = 0, + RESOURCE_NUM_L2_QUEUE_E = 1, + RESOURCE_NUM_VPORT_E = 2, + RESOURCE_NUM_VMQ_E = 3, + RESOURCE_FACTOR_NUM_RSS_PF_E = 4, + RESOURCE_FACTOR_RSS_PER_VF_E = 5, + RESOURCE_NUM_RL_E = 6, + RESOURCE_NUM_PQ_E = 7, + RESOURCE_NUM_VF_E = 8, + RESOURCE_VFC_FILTER_E = 9, + RESOURCE_ILT_E = 10, + RESOURCE_CQS_E = 11, + RESOURCE_GFT_PROFILES_E = 12, + RESOURCE_NUM_TC_E = 13, + RESOURCE_NUM_RSS_ENGINES_E = 14, + RESOURCE_LL2_QUEUE_E = 15, + RESOURCE_RDMA_STATS_QUEUE_E = 16, + RESOURCE_MAX_NUM, + RESOURCE_NUM_INVALID = 0xFFFFFFFF +}; + +/* Resource ID is to be filled by the driver in the MB request + * Size, offset & flags to be filled by the MFW in the MB response + */ +struct resource_info { + enum resource_id_enum res_id; + u32 size; /* number of allocated resources */ + u32 offset; /* Offset of the 1st resource */ + u32 vf_size; + u32 vf_offset; + u32 flags; +#define RESOURCE_ELEMENT_STRICT (1 << 0) +}; + union drv_union_data { u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD]; struct mcp_mac wol_mac; @@ -8543,9 +8578,9 @@ union drv_union_data { struct drv_version_stc drv_version; struct lan_stats_stc lan_stats; - u64 reserved_stats[11]; struct ocbb_data_stc ocbb_info; struct temperature_status_stc temp_info; + struct resource_info resource; struct bist_nvm_image_att nvm_image_att; struct mdump_config_stc mdump_config; }; @@ -8561,9 +8596,19 @@ struct public_drv_mb { #define DRV_MSG_CODE_INIT_PHY 0x22000000 #define DRV_MSG_CODE_LINK_RESET 0x23000000 #define DRV_MSG_CODE_SET_DCBX 0x25000000 +#define DRV_MSG_CODE_OV_UPDATE_CURR_CFG 0x26000000 +#define DRV_MSG_CODE_OV_UPDATE_BUS_NUM 0x27000000 +#define DRV_MSG_CODE_OV_UPDATE_BOOT_PROGRESS 0x28000000 +#define DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER 0x29000000 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE 0x31000000 +#define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 +#define DRV_MSG_CODE_OV_UPDATE_MTU 0x33000000 +#define DRV_MSG_CODE_OV_UPDATE_WOL 0x38000000 +#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE 0x39000000 #define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 +#define DRV_MSG_GET_RESOURCE_ALLOC_MSG 0x34000000 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 #define DRV_MSG_CODE_NVM_GET_FILE_ATT 0x00030000 @@ -8571,6 +8616,13 @@ struct public_drv_mb { #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 #define DRV_MSG_CODE_MCP_HALT 0x00100000 +#define DRV_MSG_CODE_SET_VMAC 0x00110000 +#define DRV_MSG_CODE_GET_VMAC 0x00120000 +#define DRV_MSG_CODE_VMAC_TYPE_SHIFT 4 +#define DRV_MSG_CODE_VMAC_TYPE_MASK 0x30 +#define DRV_MSG_CODE_VMAC_TYPE_MAC 1 +#define DRV_MSG_CODE_VMAC_TYPE_WWNN 2 +#define DRV_MSG_CODE_VMAC_TYPE_WWPN 3 #define DRV_MSG_CODE_GET_STATS 0x00130000 #define DRV_MSG_CODE_STATS_TYPE_LAN 1 @@ -8582,11 +8634,16 @@ struct public_drv_mb { #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 +#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000 +#define DRV_MSG_CODE_OS_WOL 0x002e0000 #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; -#define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 +#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN 0x00000000 +#define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 +#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED 0x00000002 +#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED 0x00000003 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK 0x000000FF #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT 3 @@ -8599,13 +8656,59 @@ struct public_drv_mb { #define DRV_MB_PARAM_LLDP_SEND_MASK 0x00000001 #define DRV_MB_PARAM_LLDP_SEND_SHIFT 0 +#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT 0 +#define DRV_MB_PARAM_OV_CURR_CFG_MASK 0x0000000F +#define DRV_MB_PARAM_OV_CURR_CFG_NONE 0 +#define DRV_MB_PARAM_OV_CURR_CFG_OS 1 +#define DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC 2 +#define DRV_MB_PARAM_OV_CURR_CFG_OTHER 3 + +#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT 0 +#define DRV_MB_PARAM_OV_STORM_FW_VER_MASK 0xFFFFFFFF +#define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK 0xFF000000 +#define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK 0x00FF0000 +#define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK 0x0000FF00 +#define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK 0x000000FF + +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT 0 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK 0xF +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN 0x1 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED 0x2 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_LOADING 0x3 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED 0x4 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE 0x5 + +#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT 0 +#define DRV_MB_PARAM_OV_MTU_SIZE_MASK 0xFFFFFFFF + +#define DRV_MB_PARAM_WOL_MASK (DRV_MB_PARAM_WOL_DEFAULT | \ + DRV_MB_PARAM_WOL_DISABLED | \ + DRV_MB_PARAM_WOL_ENABLED) +#define DRV_MB_PARAM_WOL_DEFAULT DRV_MB_PARAM_UNLOAD_WOL_MCP +#define DRV_MB_PARAM_WOL_DISABLED DRV_MB_PARAM_UNLOAD_WOL_DISABLED +#define DRV_MB_PARAM_WOL_ENABLED DRV_MB_PARAM_UNLOAD_WOL_ENABLED + +#define DRV_MB_PARAM_ESWITCH_MODE_MASK (DRV_MB_PARAM_ESWITCH_MODE_NONE | \ + DRV_MB_PARAM_ESWITCH_MODE_VEB | \ + DRV_MB_PARAM_ESWITCH_MODE_VEPA) +#define DRV_MB_PARAM_ESWITCH_MODE_NONE 0x0 +#define DRV_MB_PARAM_ESWITCH_MODE_VEB 0x1 +#define DRV_MB_PARAM_ESWITCH_MODE_VEPA 0x2 #define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 #define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 #define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + /* Resource Allocation params - Driver version support */ +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000 +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16 +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK 0x0000FFFF +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT 0 + #define DRV_MB_PARAM_BIST_REGISTER_TEST 1 #define DRV_MB_PARAM_BIST_CLOCK_TEST 2 +#define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES 3 +#define DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX 4 #define DRV_MB_PARAM_BIST_RC_UNKNOWN 0 #define DRV_MB_PARAM_BIST_RC_PASSED 1 @@ -8614,6 +8717,8 @@ struct public_drv_mb { #define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT 0 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK 0x000000FF +#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT 8 +#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK 0x0000FF00 u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 @@ -8628,15 +8733,27 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_UNLOAD_PORT 0x20120000 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION 0x20130000 #define FW_MSG_CODE_DRV_UNLOAD_DONE 0x21100000 +#define FW_MSG_CODE_RESOURCE_ALLOC_OK 0x34000000 +#define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN 0x35000000 +#define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED 0x36000000 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 #define FW_MSG_CODE_NVM_OK 0x00010000 #define FW_MSG_CODE_OK 0x00160000 +#define FW_MSG_CODE_OS_WOL_SUPPORTED 0x00800000 +#define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED 0x00810000 + #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 fw_mb_param; + /* get pf rdma protocol command responce */ +#define FW_MB_PARAM_GET_PF_RDMA_NONE 0x0 +#define FW_MB_PARAM_GET_PF_RDMA_ROCE 0x1 +#define FW_MB_PARAM_GET_PF_RDMA_IWARP 0x2 +#define FW_MB_PARAM_GET_PF_RDMA_BOTH 0x3 + u32 drv_pulse_mb; #define DRV_PULSE_SEQ_MASK 0x00007fff #define DRV_PULSE_SYSTEM_TIME_MASK 0xffff0000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 2adedc6fb6cf..bb74e1c10ffe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -3030,6 +3030,31 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) } } } + + /* There's a possibility the igu_sb_cnt_iov doesn't properly reflect + * the number of VF SBs [especially for first VF on engine, as we can't + * diffrentiate between empty entries and its entries]. + * Since we don't really support more SBs than VFs today, prevent any + * such configuration by sanitizing the number of SBs to equal the + * number of VFs. + */ + if (IS_PF_SRIOV(p_hwfn)) { + u16 total_vfs = p_hwfn->cdev->p_iov_info->total_vfs; + + if (total_vfs < p_igu_info->free_blks) { + DP_VERBOSE(p_hwfn, + (NETIF_MSG_INTR | QED_MSG_IOV), + "Limiting number of SBs for IOV - %04x --> %04x\n", + p_igu_info->free_blks, + p_hwfn->cdev->p_iov_info->total_vfs); + p_igu_info->free_blks = total_vfs; + } else if (total_vfs > p_igu_info->free_blks) { + DP_NOTICE(p_hwfn, + "IGU has only %04x SBs for VFs while the device has %04x VFs\n", + p_igu_info->free_blks, total_vfs); + return -EINVAL; + } + } p_igu_info->igu_sb_cnt_iov = p_igu_info->free_blks; DP_VERBOSE( @@ -3163,7 +3188,12 @@ u16 qed_int_queue_id_from_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) return sb_id - p_info->igu_base_sb; } else if ((sb_id >= p_info->igu_base_sb_iov) && (sb_id < p_info->igu_base_sb_iov + p_info->igu_sb_cnt_iov)) { - return sb_id - p_info->igu_base_sb_iov + p_info->igu_sb_cnt; + /* We want the first VF queue to be adjacent to the + * last PF queue. Since L2 queues can be partial to + * SBs, we'll use the feature instead. + */ + return sb_id - p_info->igu_base_sb_iov + + FEAT_NUM(p_hwfn, QED_PF_L2_QUE); } else { DP_NOTICE(p_hwfn, "SB %d not in range for function\n", sb_id); return 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index ddd410a91e13..900b253be317 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1652,6 +1652,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, if (IS_PF(cdev)) { int max_vf_vlan_filters = 0; + int max_vf_mac_filters = 0; if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { for_each_hwfn(cdev, i) @@ -1665,11 +1666,18 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, info->num_queues = cdev->num_hwfns; } - if (IS_QED_SRIOV(cdev)) + if (IS_QED_SRIOV(cdev)) { max_vf_vlan_filters = cdev->p_iov_info->total_vfs * QED_ETH_VF_NUM_VLAN_FILTERS; - info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN) - + max_vf_mac_filters = cdev->p_iov_info->total_vfs * + QED_ETH_VF_NUM_MAC_FILTERS; + } + info->num_vlan_filters = RESC_NUM(QED_LEADING_HWFN(cdev), + QED_VLAN) - max_vf_vlan_filters; + info->num_mac_filters = RESC_NUM(QED_LEADING_HWFN(cdev), + QED_MAC) - + max_vf_mac_filters; ether_addr_copy(info->port_mac, cdev->hwfns[0].hw_info.hw_mac_addr); @@ -1683,7 +1691,9 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, } qed_vf_get_num_vlan_filters(&cdev->hwfns[0], - &info->num_vlan_filters); + (u8 *)&info->num_vlan_filters); + qed_vf_get_num_mac_filters(&cdev->hwfns[0], + (u8 *)&info->num_mac_filters); qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac); info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 333c7442e48a..aeb98d8c5626 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -221,6 +221,10 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->fw_eng = FW_ENGINEERING_VERSION; dev_info->mf_mode = cdev->mf_mode; dev_info->tx_switching = true; + + if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support == + QED_WOL_SUPPORT_PME) + dev_info->wol_support = true; } else { qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major, &dev_info->fw_minor, &dev_info->fw_rev, @@ -243,6 +247,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, &dev_info->mfw_rev, NULL); } + dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu; + return 0; } @@ -1430,11 +1436,106 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +static int qed_update_wol(struct qed_dev *cdev, bool enabled) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int rc = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + rc = qed_mcp_ov_update_wol(hwfn, ptt, enabled ? QED_OV_WOL_ENABLED + : QED_OV_WOL_DISABLED); + if (rc) + goto out; + rc = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return rc; +} + +static int qed_update_drv_state(struct qed_dev *cdev, bool active) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_driver_state(hwfn, ptt, active ? + QED_OV_DRIVER_STATE_ACTIVE : + QED_OV_DRIVER_STATE_DISABLED); + + qed_ptt_release(hwfn, ptt); + + return status; +} + +static int qed_update_mac(struct qed_dev *cdev, u8 *mac) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_mac(hwfn, ptt, mac); + if (status) + goto out; + + status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return status; +} + +static int qed_update_mtu(struct qed_dev *cdev, u16 mtu) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_mtu(hwfn, ptt, mtu); + if (status) + goto out; + + status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return status; +} + static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, .selftest_register = &qed_selftest_register, .selftest_clock = &qed_selftest_clock, + .selftest_nvram = &qed_selftest_nvram, }; const struct qed_common_ops qed_common_ops_pass = { @@ -1464,6 +1565,10 @@ const struct qed_common_ops qed_common_ops_pass = { .get_coalesce = &qed_get_coalesce, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, + .update_drv_state = &qed_update_drv_state, + .update_mac = &qed_update_mac, + .update_mtu = &qed_update_mtu, + .update_wol = &qed_update_wol, }; void qed_get_protocol_stats(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bdc9ba92f6d4..6dd3ce443484 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/etherdevice.h> #include "qed.h" #include "qed_dcbx.h" #include "qed_hsi.h" @@ -329,6 +330,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_mcp_mb_params *p_mb_params) { u32 union_data_addr; + int rc; /* MCP not initialized */ @@ -374,11 +376,32 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, u32 *o_mcp_param) { struct qed_mcp_mb_params mb_params; + union drv_union_data data_src; int rc; memset(&mb_params, 0, sizeof(mb_params)); + memset(&data_src, 0, sizeof(data_src)); mb_params.cmd = cmd; mb_params.param = param; + + /* In case of UNLOAD_DONE, set the primary MAC */ + if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) && + (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) { + u8 *p_mac = p_hwfn->cdev->wol_mac; + + data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1]; + data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 | + p_mac[4] << 8 | p_mac[5]; + + DP_VERBOSE(p_hwfn, + (QED_MSG_SP | NETIF_MSG_IFDOWN), + "Setting WoL MAC: %pM --> [%08x,%08x]\n", + p_mac, data_src.wol_mac.mac_upper, + data_src.wol_mac.mac_lower); + + mb_params.p_data_src = &data_src; + } + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); if (rc) return rc; @@ -1001,28 +1024,89 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) return 0; } +/* Old MFW has a global configuration for all PFs regarding RDMA support */ +static void +qed_mcp_get_shmem_proto_legacy(struct qed_hwfn *p_hwfn, + enum qed_pci_personality *p_proto) +{ + /* There wasn't ever a legacy MFW that published iwarp. + * So at this point, this is either plain l2 or RoCE. + */ + if (test_bit(QED_DEV_CAP_ROCE, &p_hwfn->hw_info.device_capabilities)) + *p_proto = QED_PCI_ETH_ROCE; + else + *p_proto = QED_PCI_ETH; + + DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, + "According to Legacy capabilities, L2 personality is %08x\n", + (u32) *p_proto); +} + +static int +qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_pci_personality *p_proto) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL, 0, &resp, ¶m); + if (rc) + return rc; + if (resp != FW_MSG_CODE_OK) { + DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, + "MFW lacks support for command; Returns %08x\n", + resp); + return -EINVAL; + } + + switch (param) { + case FW_MB_PARAM_GET_PF_RDMA_NONE: + *p_proto = QED_PCI_ETH; + break; + case FW_MB_PARAM_GET_PF_RDMA_ROCE: + *p_proto = QED_PCI_ETH_ROCE; + break; + case FW_MB_PARAM_GET_PF_RDMA_BOTH: + DP_NOTICE(p_hwfn, + "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n"); + *p_proto = QED_PCI_ETH_ROCE; + break; + case FW_MB_PARAM_GET_PF_RDMA_IWARP: + default: + DP_NOTICE(p_hwfn, + "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n", + param); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, + NETIF_MSG_IFUP, + "According to capabilities, L2 personality is %08x [resp %08x param %08x]\n", + (u32) *p_proto, resp, param); + return 0; +} + static int qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn, struct public_func *p_info, + struct qed_ptt *p_ptt, enum qed_pci_personality *p_proto) { int rc = 0; switch (p_info->config & FUNC_MF_CFG_PROTOCOL_MASK) { case FUNC_MF_CFG_PROTOCOL_ETHERNET: - if (test_bit(QED_DEV_CAP_ROCE, - &p_hwfn->hw_info.device_capabilities)) - *p_proto = QED_PCI_ETH_ROCE; - else - *p_proto = QED_PCI_ETH; + if (qed_mcp_get_shmem_proto_mfw(p_hwfn, p_ptt, p_proto)) + qed_mcp_get_shmem_proto_legacy(p_hwfn, p_proto); break; case FUNC_MF_CFG_PROTOCOL_ISCSI: *p_proto = QED_PCI_ISCSI; break; case FUNC_MF_CFG_PROTOCOL_ROCE: DP_NOTICE(p_hwfn, "RoCE personality is not a valid value!\n"); - rc = -EINVAL; - break; + /* Fallthrough */ default: rc = -EINVAL; } @@ -1042,7 +1126,8 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->pause_on_host = (shmem_info.config & FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0; - if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) { + if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, p_ptt, + &info->protocol)) { DP_ERR(p_hwfn, "Unknown personality %08x\n", (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK)); return -EINVAL; @@ -1057,6 +1142,9 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->mac[3] = (u8)(shmem_info.mac_lower >> 16); info->mac[4] = (u8)(shmem_info.mac_lower >> 8); info->mac[5] = (u8)(shmem_info.mac_lower); + + /* Store primary MAC for later possible WoL */ + memcpy(&p_hwfn->cdev->wol_mac, info->mac, ETH_ALEN); } else { DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n"); } @@ -1068,13 +1156,30 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK); + info->mtu = (u16)shmem_info.mtu_size; + + p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_NONE; + p_hwfn->cdev->wol_config = (u8)QED_OV_WOL_DEFAULT; + if (qed_mcp_is_init(p_hwfn)) { + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_OS_WOL, 0, &resp, ¶m); + if (rc) + return rc; + if (resp == FW_MSG_CODE_OS_WOL_SUPPORTED) + p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_PME; + } + DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP), - "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n", + "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x wol %02x\n", info->pause_on_host, info->protocol, info->bandwidth_min, info->bandwidth_max, info->mac[0], info->mac[1], info->mac[2], info->mac[3], info->mac[4], info->mac[5], - info->wwn_port, info->wwn_node, info->ovlan); + info->wwn_port, info->wwn_node, + info->ovlan, (u8)p_hwfn->hw_info.b_wol_support); return 0; } @@ -1223,6 +1328,178 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; } +int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_client client) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (client) { + case QED_OV_CLIENT_DRV: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OS; + break; + case QED_OV_CLIENT_USER: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OTHER; + break; + case QED_OV_CLIENT_VENDOR_SPEC: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC; + break; + default: + DP_NOTICE(p_hwfn, "Invalid client type %d\n", client); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_CURR_CFG, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + + return rc; +} + +int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_driver_state drv_state) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (drv_state) { + case QED_OV_DRIVER_STATE_NOT_LOADED: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED; + break; + case QED_OV_DRIVER_STATE_DISABLED: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED; + break; + case QED_OV_DRIVER_STATE_ACTIVE: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE; + break; + default: + DP_NOTICE(p_hwfn, "Invalid driver state %d\n", drv_state); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send driver state\n"); + + return rc; +} + +int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 mtu) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + drv_mb_param = (u32)mtu << DRV_MB_PARAM_OV_MTU_SIZE_SHIFT; + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc); + + return rc; +} + +int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *mac) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_SET_VMAC; + mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC << + DRV_MSG_CODE_VMAC_TYPE_SHIFT; + mb_params.param |= MCP_PF_ID(p_hwfn); + ether_addr_copy(&union_data.raw_data[0], mac); + mb_params.p_data_src = &union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc); + + /* Store primary MAC for later possible WoL */ + memcpy(p_hwfn->cdev->wol_mac, mac, ETH_ALEN); + + return rc; +} + +int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, enum qed_ov_wol wol) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + if (p_hwfn->hw_info.b_wol_support == QED_WOL_SUPPORT_NONE) { + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "Can't change WoL configuration when WoL isn't supported\n"); + return -EINVAL; + } + + switch (wol) { + case QED_OV_WOL_DEFAULT: + drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT; + break; + case QED_OV_WOL_DISABLED: + drv_mb_param = DRV_MB_PARAM_WOL_DISABLED; + break; + case QED_OV_WOL_ENABLED: + drv_mb_param = DRV_MB_PARAM_WOL_ENABLED; + break; + default: + DP_ERR(p_hwfn, "Invalid wol state %d\n", wol); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_WOL, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc); + + /* Store the WoL update for a future unload */ + p_hwfn->cdev->wol_config = (u8)wol; + + return rc; +} + +int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_eswitch eswitch) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (eswitch) { + case QED_OV_ESWITCH_NONE: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE; + break; + case QED_OV_ESWITCH_VEB: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB; + break; + case QED_OV_ESWITCH_VEPA: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA; + break; + default: + DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc); + + return rc; +} + int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_led_mode mode) { @@ -1271,6 +1548,52 @@ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, return rc; } +int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len) +{ + u32 bytes_left = len, offset = 0, bytes_to_copy, read_len = 0; + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + u32 resp = 0, resp_param = 0; + struct qed_ptt *p_ptt; + int rc = 0; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EBUSY; + + while (bytes_left > 0) { + bytes_to_copy = min_t(u32, bytes_left, MCP_DRV_NVM_BUF_LEN); + + rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_NVM_READ_NVRAM, + addr + offset + + (bytes_to_copy << + DRV_MB_PARAM_NVM_LEN_SHIFT), + &resp, &resp_param, + &read_len, + (u32 *)(p_buf + offset)); + + if (rc || (resp != FW_MSG_CODE_NVM_OK)) { + DP_NOTICE(cdev, "MCP command rc = %d\n", rc); + break; + } + + /* This can be a lengthy process, and it's possible scheduler + * isn't preemptable. Sleep a bit to prevent CPU hogging. + */ + if (bytes_left % 0x1000 < + (bytes_left - read_len) % 0x1000) + usleep_range(1000, 2000); + + offset += read_len; + bytes_left -= read_len; + } + + cdev->mcp_nvm_resp = resp; + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 drv_mb_param = 0, rsp, param; @@ -1312,3 +1635,101 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return rc; } + +int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *num_images) +{ + u32 drv_mb_param = 0, rsp; + int rc = 0; + + drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT); + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST, + drv_mb_param, &rsp, num_images); + if (rc) + return rc; + + if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK)) + rc = -EINVAL; + + return rc; +} + +int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct bist_nvm_image_att *p_image_att, + u32 image_index) +{ + u32 buf_size = 0, param, resp = 0, resp_param = 0; + int rc; + + param = DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT; + param |= image_index << DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT; + + rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_BIST_TEST, param, + &resp, &resp_param, + &buf_size, + (u32 *)p_image_att); + if (rc) + return rc; + + if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) || + (p_image_att->return_code != 1)) + rc = -EINVAL; + + return rc; +} + +#define QED_RESC_ALLOC_VERSION_MAJOR 1 +#define QED_RESC_ALLOC_VERSION_MINOR 0 +#define QED_RESC_ALLOC_VERSION \ + ((QED_RESC_ALLOC_VERSION_MAJOR << \ + DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \ + (QED_RESC_ALLOC_VERSION_MINOR << \ + DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT)) +int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct resource_info *p_resc_info, + u32 *p_mcp_resp, u32 *p_mcp_param) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + memset(&union_data, 0, sizeof(union_data)); + mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG; + mb_params.param = QED_RESC_ALLOC_VERSION; + + /* Need to have a sufficient large struct, as the cmd_and_union + * is going to do memcpy from and to it. + */ + memcpy(&union_data.resource, p_resc_info, sizeof(*p_resc_info)); + + mb_params.p_data_src = &union_data; + mb_params.p_data_dst = &union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + /* Copy the data back */ + memcpy(p_resc_info, &union_data.resource, sizeof(*p_resc_info)); + *p_mcp_resp = mb_params.mcp_resp; + *p_mcp_param = mb_params.mcp_param; + + DP_VERBOSE(p_hwfn, + QED_MSG_SP, + "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n", + *p_mcp_param, + p_resc_info->res_id, + p_resc_info->size, + p_resc_info->offset, + p_resc_info->vf_size, + p_resc_info->vf_offset, p_resc_info->flags); + + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dff520ed069b..407a2c1830fb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -92,6 +92,8 @@ struct qed_mcp_function_info { #define QED_MCP_VLAN_UNSET (0xffff) u16 ovlan; + + u16 mtu; }; struct qed_mcp_nvm_common { @@ -147,6 +149,30 @@ union qed_mcp_protocol_stats { struct qed_mcp_rdma_stats rdma_stats; }; +enum qed_ov_eswitch { + QED_OV_ESWITCH_NONE, + QED_OV_ESWITCH_VEB, + QED_OV_ESWITCH_VEPA +}; + +enum qed_ov_client { + QED_OV_CLIENT_DRV, + QED_OV_CLIENT_USER, + QED_OV_CLIENT_VENDOR_SPEC +}; + +enum qed_ov_driver_state { + QED_OV_DRIVER_STATE_NOT_LOADED, + QED_OV_DRIVER_STATE_DISABLED, + QED_OV_DRIVER_STATE_ACTIVE +}; + +enum qed_ov_wol { + QED_OV_WOL_DEFAULT, + QED_OV_WOL_DISABLED, + QED_OV_WOL_ENABLED +}; + /** * @brief - returns the link params of the hw function * @@ -278,6 +304,69 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_mcp_drv_version *p_ver); /** + * @brief Notify MFW about the change in base device properties + * + * @param p_hwfn + * @param p_ptt + * @param client - qed client type + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_client client); + +/** + * @brief Notify MFW about the driver state + * + * @param p_hwfn + * @param p_ptt + * @param drv_state - Driver state + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_driver_state drv_state); + +/** + * @brief Send MTU size to MFW + * + * @param p_hwfn + * @param p_ptt + * @param mtu - MTU size + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 mtu); + +/** + * @brief Send MAC address to MFW + * + * @param p_hwfn + * @param p_ptt + * @param mac - MAC address + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *mac); + +/** + * @brief Send WOL mode to MFW + * + * @param p_hwfn + * @param p_ptt + * @param wol - WOL mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_wol wol); + +/** * @brief Set LED status * * @param p_hwfn @@ -291,6 +380,18 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, enum qed_led_mode mode); /** + * @brief Read from nvm + * + * @param cdev + * @param addr - nvm offset + * @param p_buf - nvm read buffer + * @param len - buffer len + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len); + +/** * @brief Bist register test * * @param p_hwfn - hw function @@ -312,6 +413,35 @@ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief Bist nvm test - get number of images + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * @param num_images - number of images if operation was + * successful. 0 if not. + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *num_images); + +/** + * @brief Bist nvm test - get image attributes by index + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * @param p_image_att - Attributes of image + * @param image_index - Index of image to get information for + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct bist_nvm_image_att *p_image_att, + u32 image_index); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields @@ -546,4 +676,32 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 mask_parities); +/** + * @brief Send eswitch mode to MFW + * + * @param p_hwfn + * @param p_ptt + * @param eswitch - eswitch mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_eswitch eswitch); + +/** + * @brief - Gets the MFW allocation info for the given resource + * + * @param p_hwfn + * @param p_ptt + * @param p_resc_info - descriptor of requested resource + * @param p_mcp_resp + * @param p_mcp_param + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct resource_info *p_resc_info, + u32 *p_mcp_resp, u32 *p_mcp_param); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f3a825a8f8d5..6a353ffb87a4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -2658,7 +2658,6 @@ static int qed_roce_ll2_start(struct qed_dev *cdev, DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n"); return -ENOMEM; } - memset(roce_ll2, 0, sizeof(*roce_ll2)); roce_ll2->handle = QED_LL2_UNUSED_HANDLE; roce_ll2->cbs = params->cbs; roce_ll2->cb_cookie = params->cb_cookie; diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c index 9b7678f26909..48bfaecaf6dc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -1,3 +1,4 @@ +#include <linux/crc32.h> #include "qed.h" #include "qed_dev_api.h" #include "qed_mcp.h" @@ -75,3 +76,103 @@ int qed_selftest_clock(struct qed_dev *cdev) return rc; } + +int qed_selftest_nvram(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u32 num_images, i, j, nvm_crc, calc_crc; + struct bist_nvm_image_att image_att; + u8 *buf = NULL; + __be32 val; + int rc; + + if (!p_ptt) { + DP_ERR(p_hwfn, "failed to acquire ptt\n"); + return -EBUSY; + } + + /* Acquire from MFW the amount of available images */ + rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images); + if (rc || !num_images) { + DP_ERR(p_hwfn, "Failed getting number of images\n"); + return -EINVAL; + } + + /* Iterate over images and validate CRC */ + for (i = 0; i < num_images; i++) { + /* This mailbox returns information about the image required for + * reading it. + */ + rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt, + &image_att, i); + if (rc) { + DP_ERR(p_hwfn, + "Failed getting image index %d attributes\n", + i); + goto err0; + } + + /* After MFW crash dump is collected - the image's CRC stops + * being valid. + */ + if (image_att.image_type == NVM_TYPE_MDUMP) + continue; + + DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n", + i, image_att.len); + + /* Allocate a buffer for holding the nvram image */ + buf = kzalloc(image_att.len, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + goto err0; + } + + /* Read image into buffer */ + rc = qed_mcp_nvm_read(p_hwfn->cdev, image_att.nvm_start_addr, + buf, image_att.len); + if (rc) { + DP_ERR(p_hwfn, + "Failed reading image index %d from nvm.\n", i); + goto err1; + } + + /* Convert the buffer into big-endian format (excluding the + * closing 4 bytes of CRC). + */ + for (j = 0; j < image_att.len - 4; j += 4) { + val = cpu_to_be32(*(u32 *)&buf[j]); + *(u32 *)&buf[j] = (__force u32)val; + } + + /* Calc CRC for the "actual" image buffer, i.e. not including + * the last 4 CRC bytes. + */ + nvm_crc = *(u32 *)(buf + image_att.len - 4); + calc_crc = crc32(0xffffffff, buf, image_att.len - 4); + calc_crc = (__force u32)~cpu_to_be32(calc_crc); + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "nvm crc 0x%x, calc_crc 0x%x\n", nvm_crc, calc_crc); + + if (calc_crc != nvm_crc) { + rc = -EINVAL; + goto err1; + } + + /* Done with this image; Free to prevent double release + * on subsequent failure. + */ + kfree(buf); + buf = NULL; + } + + qed_ptt_release(p_hwfn, p_ptt); + return 0; + +err1: + kfree(buf); +err0: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h index 50eb0b49950f..739ddb730967 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h @@ -37,4 +37,14 @@ int qed_selftest_register(struct qed_dev *cdev); * @return int */ int qed_selftest_clock(struct qed_dev *cdev); + +/** + * @brief qed_selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return int + */ +int qed_selftest_nvram(struct qed_dev *cdev); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index b2c08e4d2a9b..9c897bc68d05 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -110,8 +110,8 @@ union qed_spq_req_comp { }; struct qed_spq_comp_done { - u64 done; - u8 fw_return_code; + unsigned int done; + u8 fw_return_code; }; struct qed_spq_entry { diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 9fbaf9429fd0..019960b7855a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -35,7 +35,11 @@ ***************************************************************************/ #define SPQ_HIGH_PRI_RESERVE_DEFAULT (1) -#define SPQ_BLOCK_SLEEP_LENGTH (1000) + +#define SPQ_BLOCK_DELAY_MAX_ITER (10) +#define SPQ_BLOCK_DELAY_US (10) +#define SPQ_BLOCK_SLEEP_MAX_ITER (1000) +#define SPQ_BLOCK_SLEEP_MS (5) /*************************************************************************** * Blocking Imp. (BLOCK/EBLOCK mode) @@ -48,60 +52,88 @@ static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn, comp_done = (struct qed_spq_comp_done *)cookie; - comp_done->done = 0x1; - comp_done->fw_return_code = fw_return_code; + comp_done->fw_return_code = fw_return_code; - /* make update visible to waiting thread */ - smp_wmb(); + /* Make sure completion done is visible on waiting thread */ + smp_store_release(&comp_done->done, 0x1); } -static int qed_spq_block(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent, - u8 *p_fw_ret) +static int __qed_spq_block(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent, + u8 *p_fw_ret, bool sleep_between_iter) { - int sleep_count = SPQ_BLOCK_SLEEP_LENGTH; struct qed_spq_comp_done *comp_done; - int rc; + u32 iter_cnt; comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; - while (sleep_count) { - /* validate we receive completion update */ - smp_rmb(); - if (comp_done->done == 1) { + iter_cnt = sleep_between_iter ? SPQ_BLOCK_SLEEP_MAX_ITER + : SPQ_BLOCK_DELAY_MAX_ITER; + + while (iter_cnt--) { + /* Validate we receive completion update */ + if (READ_ONCE(comp_done->done) == 1) { + /* Read updated FW return value */ + smp_read_barrier_depends(); if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; return 0; } - usleep_range(5000, 10000); - sleep_count--; + + if (sleep_between_iter) + msleep(SPQ_BLOCK_SLEEP_MS); + else + udelay(SPQ_BLOCK_DELAY_US); + } + + return -EBUSY; +} + +static int qed_spq_block(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent, + u8 *p_fw_ret, bool skip_quick_poll) +{ + struct qed_spq_comp_done *comp_done; + int rc; + + /* A relatively short polling period w/o sleeping, to allow the FW to + * complete the ramrod and thus possibly to avoid the following sleeps. + */ + if (!skip_quick_poll) { + rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, false); + if (!rc) + return 0; } + /* Move to polling with a sleeping period between iterations */ + rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); + if (!rc) + return 0; + DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n"); rc = qed_mcp_drain(p_hwfn, p_hwfn->p_main_ptt); - if (rc != 0) + if (rc) { DP_NOTICE(p_hwfn, "MCP drain failed\n"); + goto err; + } /* Retry after drain */ - sleep_count = SPQ_BLOCK_SLEEP_LENGTH; - while (sleep_count) { - /* validate we receive completion update */ - smp_rmb(); - if (comp_done->done == 1) { - if (p_fw_ret) - *p_fw_ret = comp_done->fw_return_code; - return 0; - } - usleep_range(5000, 10000); - sleep_count--; - } + rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); + if (!rc) + return 0; + comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; if (comp_done->done == 1) { if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; return 0; } - - DP_NOTICE(p_hwfn, "Ramrod is stuck, MCP drain failed\n"); +err: + DP_NOTICE(p_hwfn, + "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", + le32_to_cpu(p_ent->elem.hdr.cid), + p_ent->elem.hdr.cmd_id, + p_ent->elem.hdr.protocol_id, + le16_to_cpu(p_ent->elem.hdr.echo)); return -EBUSY; } @@ -725,7 +757,8 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, * access p_ent here to see whether it's successful or not. * Thus, after gaining the answer perform the cleanup here. */ - rc = qed_spq_block(p_hwfn, p_ent, fw_return_code); + rc = qed_spq_block(p_hwfn, p_ent, fw_return_code, + p_ent->queue == &p_spq->unlimited_pending); if (p_ent->queue == &p_spq->unlimited_pending) { /* This is an allocated p_ent which does not need to diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index d2d6621fe0e5..f3f742a4e59a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -109,7 +109,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn, } static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, - int rel_vf_id, bool b_enabled_only) + int rel_vf_id, + bool b_enabled_only, bool b_non_malicious) { if (!p_hwfn->pf_iov_info) { DP_NOTICE(p_hwfn->cdev, "No iov info\n"); @@ -124,6 +125,10 @@ static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, b_enabled_only) return false; + if ((p_hwfn->pf_iov_info->vfs_array[rel_vf_id].b_malicious) && + b_non_malicious) + return false; + return true; } @@ -138,7 +143,8 @@ static struct qed_vf_info *qed_iov_get_vf_info(struct qed_hwfn *p_hwfn, return NULL; } - if (qed_iov_is_valid_vfid(p_hwfn, relative_vf_id, b_enabled_only)) + if (qed_iov_is_valid_vfid(p_hwfn, relative_vf_id, + b_enabled_only, false)) vf = &p_hwfn->pf_iov_info->vfs_array[relative_vf_id]; else DP_ERR(p_hwfn, "qed_iov_get_vf_info: VF[%d] is not enabled\n", @@ -542,7 +548,8 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn) return 0; } -static bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid) +bool _qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, + int vfid, bool b_fail_malicious) { /* Check PF supports sriov */ if (IS_VF(p_hwfn->cdev) || !IS_QED_SRIOV(p_hwfn->cdev) || @@ -550,12 +557,17 @@ static bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid) return false; /* Check VF validity */ - if (!qed_iov_is_valid_vfid(p_hwfn, vfid, true)) + if (!qed_iov_is_valid_vfid(p_hwfn, vfid, true, b_fail_malicious)) return false; return true; } +bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid) +{ + return _qed_iov_pf_sanity_check(p_hwfn, vfid, true); +} + static void qed_iov_set_vf_to_disable(struct qed_dev *cdev, u16 rel_vf_id, u8 to_disable) { @@ -652,6 +664,9 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn, qed_iov_vf_igu_reset(p_hwfn, p_ptt, vf); + /* It's possible VF was previously considered malicious */ + vf->b_malicious = false; + rc = qed_mcp_config_vf_msix(p_hwfn, p_ptt, vf->abs_vf_id, vf->num_sbs); if (rc) return rc; @@ -2804,6 +2819,13 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn, return rc; } + /* Workaround to make VF-PF channel ready, as FW + * doesn't do that as a part of FLR. + */ + REG_WR(p_hwfn, + GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_VF_PF_CHANNEL_READY_OFFSET(vfid), 1); + /* VF_STOPPED has to be set only after final cleanup * but prior to re-enabling the VF. */ @@ -2942,7 +2964,8 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, mbx->first_tlv = mbx->req_virt->first_tlv; /* check if tlv type is known */ - if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) { + if (qed_iov_tlv_supported(mbx->first_tlv.tl.type) && + !p_vf->b_malicious) { switch (mbx->first_tlv.tl.type) { case CHANNEL_TLV_ACQUIRE: qed_iov_vf_mbx_acquire(p_hwfn, p_ptt, p_vf); @@ -2984,6 +3007,15 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, qed_iov_vf_mbx_release(p_hwfn, p_ptt, p_vf); break; } + } else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF [%02x] - considered malicious; Ignoring TLV [%04x]\n", + p_vf->abs_vf_id, mbx->first_tlv.tl.type); + + qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf, + mbx->first_tlv.tl.type, + sizeof(struct pfvf_def_resp_tlv), + PFVF_STATUS_MALICIOUS); } else { /* unknown TLV - this may belong to a VF driver from the future * - a version written after this PF driver was written, which @@ -3033,20 +3065,30 @@ static void qed_iov_pf_get_and_clear_pending_events(struct qed_hwfn *p_hwfn, memset(p_pending_events, 0, sizeof(u64) * QED_VF_ARRAY_LENGTH); } -static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn, - u16 abs_vfid, struct regpair *vf_msg) +static struct qed_vf_info *qed_sriov_get_vf_from_absid(struct qed_hwfn *p_hwfn, + u16 abs_vfid) { - u8 min = (u8)p_hwfn->cdev->p_iov_info->first_vf_in_pf; - struct qed_vf_info *p_vf; + u8 min = (u8) p_hwfn->cdev->p_iov_info->first_vf_in_pf; - if (!qed_iov_pf_sanity_check(p_hwfn, (int)abs_vfid - min)) { + if (!_qed_iov_pf_sanity_check(p_hwfn, (int)abs_vfid - min, false)) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, - "Got a message from VF [abs 0x%08x] that cannot be handled by PF\n", + "Got indication for VF [abs 0x%08x] that cannot be handled by PF\n", abs_vfid); - return 0; + return NULL; } - p_vf = &p_hwfn->pf_iov_info->vfs_array[(u8)abs_vfid - min]; + + return &p_hwfn->pf_iov_info->vfs_array[(u8) abs_vfid - min]; +} + +static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn, + u16 abs_vfid, struct regpair *vf_msg) +{ + struct qed_vf_info *p_vf = qed_sriov_get_vf_from_absid(p_hwfn, + abs_vfid); + + if (!p_vf) + return 0; /* List the physical address of the request so that handler * could later on copy the message from it. @@ -3060,6 +3102,23 @@ static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn, return 0; } +static void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn, + struct malicious_vf_eqe_data *p_data) +{ + struct qed_vf_info *p_vf; + + p_vf = qed_sriov_get_vf_from_absid(p_hwfn, p_data->vf_id); + + if (!p_vf) + return; + + DP_INFO(p_hwfn, + "VF [%d] - Malicious behavior [%02x]\n", + p_vf->abs_vf_id, p_data->err_id); + + p_vf->b_malicious = true; +} + int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo, union event_ring_data *data) { @@ -3067,6 +3126,9 @@ int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, case COMMON_EVENT_VF_PF_CHANNEL: return qed_sriov_vfpf_msg(p_hwfn, le16_to_cpu(echo), &data->vf_pf_channel.msg_addr); + case COMMON_EVENT_MALICIOUS_VF: + qed_sriov_vfpf_malicious(p_hwfn, &data->malicious_vf); + return 0; default: DP_INFO(p_hwfn->cdev, "Unknown sriov eqe event 0x%02x\n", opcode); @@ -3083,7 +3145,7 @@ u16 qed_iov_get_next_active_vf(struct qed_hwfn *p_hwfn, u16 rel_vf_id) goto out; for (i = rel_vf_id; i < p_iov->total_vfs; i++) - if (qed_iov_is_valid_vfid(p_hwfn, rel_vf_id, true)) + if (qed_iov_is_valid_vfid(p_hwfn, rel_vf_id, true, false)) return i; out: @@ -3130,6 +3192,12 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, return; } + if (vf_info->b_malicious) { + DP_NOTICE(p_hwfn->cdev, + "Can't set forced MAC to malicious VF [%d]\n", vfid); + return; + } + feature = 1 << MAC_ADDR_FORCED; memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN); @@ -3153,6 +3221,12 @@ static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, return; } + if (vf_info->b_malicious) { + DP_NOTICE(p_hwfn->cdev, + "Can't set forced vlan to malicious VF [%d]\n", vfid); + return; + } + feature = 1 << VLAN_ADDR_FORCED; vf_info->bulletin.p_virt->pvid = pvid; if (pvid) @@ -3367,7 +3441,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) qed_for_each_vf(hwfn, j) { int k; - if (!qed_iov_is_valid_vfid(hwfn, j, true)) + if (!qed_iov_is_valid_vfid(hwfn, j, true, false)) continue; /* Wait until VF is disabled before releasing */ @@ -3396,7 +3470,6 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) static int qed_sriov_enable(struct qed_dev *cdev, int num) { - struct qed_sb_cnt_info sb_cnt_info; int i, j, rc; if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { @@ -3409,7 +3482,11 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) for_each_hwfn(cdev, j) { struct qed_hwfn *hwfn = &cdev->hwfns[j]; struct qed_ptt *ptt = qed_ptt_acquire(hwfn); - int num_sbs = 0, limit = 16; + int num_queues; + + /* Make sure not to use more than 16 queues per VF */ + num_queues = min_t(int, + FEAT_NUM(hwfn, QED_VF_L2_QUE) / num, 16); if (!ptt) { DP_ERR(hwfn, "Failed to acquire ptt\n"); @@ -3417,19 +3494,11 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) goto err; } - if (IS_MF_DEFAULT(hwfn)) - limit = MAX_NUM_VFS_BB / hwfn->num_funcs_on_engine; - - memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); - qed_int_get_num_sbs(hwfn, &sb_cnt_info); - num_sbs = min_t(int, sb_cnt_info.sb_free_blk, limit); - for (i = 0; i < num; i++) { - if (!qed_iov_is_valid_vfid(hwfn, i, false)) + if (!qed_iov_is_valid_vfid(hwfn, i, false, true)) continue; - rc = qed_iov_init_hw_for_vf(hwfn, - ptt, i, num_sbs / num); + rc = qed_iov_init_hw_for_vf(hwfn, ptt, i, num_queues); if (rc) { DP_ERR(cdev, "Failed to enable VF[%d]\n", i); qed_ptt_release(hwfn, ptt); @@ -3477,7 +3546,7 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid) return -EINVAL; } - if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true)) { + if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true, true)) { DP_VERBOSE(cdev, QED_MSG_IOV, "Cannot set VF[%d] MAC (VF is not active)\n", vfid); return -EINVAL; @@ -3509,7 +3578,7 @@ static int qed_sriov_pf_set_vlan(struct qed_dev *cdev, u16 vid, int vfid) return -EINVAL; } - if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true)) { + if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true, true)) { DP_VERBOSE(cdev, QED_MSG_IOV, "Cannot set VF[%d] MAC (VF is not active)\n", vfid); return -EINVAL; @@ -3543,7 +3612,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, if (IS_VF(cdev)) return -EINVAL; - if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true)) { + if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true, false)) { DP_VERBOSE(cdev, QED_MSG_IOV, "VF index [%d] isn't active\n", vf_id); return -EINVAL; @@ -3647,7 +3716,7 @@ static int qed_set_vf_link_state(struct qed_dev *cdev, if (IS_VF(cdev)) return -EINVAL; - if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true)) { + if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true, true)) { DP_VERBOSE(cdev, QED_MSG_IOV, "VF index [%d] isn't active\n", vf_id); return -EINVAL; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index 0dd23e409b3f..3cf515b1b427 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -132,6 +132,7 @@ struct qed_vf_info { struct qed_iov_vf_mbx vf_mbx; enum vf_state state; bool b_init; + bool b_malicious; u8 to_disable; struct qed_bulletin bulletin; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index abf5bf11f865..3c0633642f4c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1171,6 +1171,13 @@ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn, u8 *num_vlan_filters) *num_vlan_filters = p_vf->acquire_resp.resc.num_vlan_filters; } +void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters) +{ + struct qed_vf_iov *p_vf = p_hwfn->vf_iov_info; + + *num_mac_filters = p_vf->acquire_resp.resc.num_mac_filters; +} + bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac) { struct qed_bulletin_content *bulletin; @@ -1230,8 +1237,8 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn) is_mac_exist = qed_vf_bulletin_get_forced_mac(hwfn, mac, &is_mac_forced); - if (is_mac_exist && is_mac_forced && cookie) - ops->force_mac(cookie, mac); + if (is_mac_exist && cookie) + ops->force_mac(cookie, mac, !!is_mac_forced); /* Always update link configuration according to bulletin */ qed_link_update(hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 35db7a28aa13..325c250d4ee5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -40,6 +40,7 @@ enum { PFVF_STATUS_NOT_SUPPORTED, PFVF_STATUS_NO_RESOURCE, PFVF_STATUS_FORCED, + PFVF_STATUS_MALICIOUS, }; /* vf pf channel tlvs */ @@ -622,6 +623,14 @@ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn, u8 *num_vlan_filters); /** + * @brief Get number of MAC filters allocated for VF by qed + * + * @param p_hwfn + * @param num_rxqs - allocated MAC filters + */ +void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters); + +/** * @brief Check if VF can set a MAC address * * @param p_hwfn @@ -871,6 +880,11 @@ static inline void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn, { } +static inline void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, + u8 *num_mac_filters) +{ +} + static inline bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac) { return false; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 974689a13337..0cba21bf9d5f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -193,6 +193,8 @@ struct qede_dev { u16 vxlan_dst_port; u16 geneve_dst_port; + bool wol_enabled; + struct qede_rdma_dev rdma_info; }; @@ -320,6 +322,7 @@ struct qede_fastpath { #define XMIT_L4_CSUM BIT(0) #define XMIT_LSO BIT(1) #define XMIT_ENC BIT(2) +#define XMIT_ENC_GSO_L4_CSUM BIT(3) #define QEDE_CSUM_ERROR BIT(0) #define QEDE_CSUM_UNNECESSARY BIT(1) @@ -362,8 +365,9 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX -#define QEDE_MIN_PKT_LEN 64 -#define QEDE_RX_HDR_SIZE 256 +#define QEDE_MIN_PKT_LEN 64 +#define QEDE_RX_HDR_SIZE 256 +#define QEDE_MAX_JUMBO_PACKET_SIZE 9600 #define for_each_queue(i) for (i = 0; i < edev->num_queues; i++) #endif /* _QEDE_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 7567cc464b88..8a3debef39ee 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -157,6 +157,7 @@ enum qede_ethtool_tests { QEDE_ETHTOOL_MEMORY_TEST, QEDE_ETHTOOL_REGISTER_TEST, QEDE_ETHTOOL_CLOCK_TEST, + QEDE_ETHTOOL_NVRAM_TEST, QEDE_ETHTOOL_TEST_MAX }; @@ -166,6 +167,7 @@ static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = { "Memory (online)\t\t", "Register (online)\t", "Clock (online)\t\t", + "Nvram (online)\t\t", }; static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) @@ -325,7 +327,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = { { \ int i; \ \ - for (i = 0; i < QED_LM_COUNT; i++) { \ + for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) { \ if ((caps) & (qed_lm_map[i].qed_link_mode)) \ __set_bit(qed_lm_map[i].ethtool_link_mode,\ lk_ksettings->link_modes.name); \ @@ -336,7 +338,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = { { \ int i; \ \ - for (i = 0; i < QED_LM_COUNT; i++) { \ + for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) { \ if (test_bit(qed_lm_map[i].ethtool_link_mode, \ lk_ksettings->link_modes.name)) \ caps |= qed_lm_map[i].qed_link_mode; \ @@ -488,6 +490,45 @@ static void qede_get_drvinfo(struct net_device *ndev, strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info)); } +static void qede_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) +{ + struct qede_dev *edev = netdev_priv(ndev); + + if (edev->dev_info.common.wol_support) { + wol->supported = WAKE_MAGIC; + wol->wolopts = edev->wol_enabled ? WAKE_MAGIC : 0; + } +} + +static int qede_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) +{ + struct qede_dev *edev = netdev_priv(ndev); + bool wol_requested; + int rc; + + if (wol->wolopts & ~WAKE_MAGIC) { + DP_INFO(edev, + "Can't support WoL options other than magic-packet\n"); + return -EINVAL; + } + + wol_requested = !!(wol->wolopts & WAKE_MAGIC); + if (wol_requested == edev->wol_enabled) + return 0; + + /* Need to actually change configuration */ + if (!edev->dev_info.common.wol_support) { + DP_INFO(edev, "Device doesn't support WoL\n"); + return -EINVAL; + } + + rc = edev->ops->common->update_wol(edev->cdev, wol_requested); + if (!rc) + edev->wol_enabled = wol_requested; + + return rc; +} + static u32 qede_get_msglevel(struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); @@ -730,19 +771,11 @@ static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) } /* Netdevice NDOs */ -#define ETH_MAX_JUMBO_PACKET_SIZE 9600 -#define ETH_MIN_PACKET_SIZE 60 int qede_change_mtu(struct net_device *ndev, int new_mtu) { struct qede_dev *edev = netdev_priv(ndev); union qede_reload_args args; - if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) || - ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE)) { - DP_ERR(edev, "Can't support requested MTU size\n"); - return -EINVAL; - } - DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "Configuring MTU size of %d\n", new_mtu); @@ -754,6 +787,8 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) qede_update_mtu(edev, &args); + edev->ops->common->update_mtu(edev->cdev, args.mtu); + return 0; } @@ -1405,6 +1440,11 @@ static void qede_self_test(struct net_device *dev, buf[QEDE_ETHTOOL_CLOCK_TEST] = 1; etest->flags |= ETH_TEST_FL_FAILED; } + + if (edev->ops->common->selftest->selftest_nvram(edev->cdev)) { + buf[QEDE_ETHTOOL_NVRAM_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } } static int qede_set_tunable(struct net_device *dev, @@ -1455,6 +1495,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_drvinfo = qede_get_drvinfo, .get_regs_len = qede_get_regs_len, .get_regs = qede_get_regs, + .get_wol = qede_get_wol, + .set_wol = qede_set_wol, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .nway_reset = qede_nway_reset, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 85f46dbecd5b..b84a2c4ef083 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -95,6 +95,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id); #define TX_TIMEOUT (5 * HZ) static void qede_remove(struct pci_dev *pdev); +static void qede_shutdown(struct pci_dev *pdev); static int qede_alloc_rx_buffer(struct qede_dev *edev, struct qede_rx_queue *rxq); static void qede_link_update(void *dev, struct qed_link_output *link); @@ -166,15 +167,20 @@ static struct pci_driver qede_pci_driver = { .id_table = qede_pci_tbl, .probe = qede_probe, .remove = qede_remove, + .shutdown = qede_shutdown, #ifdef CONFIG_QED_SRIOV .sriov_configure = qede_sriov_configure, #endif }; -static void qede_force_mac(void *dev, u8 *mac) +static void qede_force_mac(void *dev, u8 *mac, bool forced) { struct qede_dev *edev = dev; + /* MAC hints take effect only if we haven't set one already */ + if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) + return; + ether_addr_copy(edev->ndev->dev_addr, mac); ether_addr_copy(edev->primary_mac, mac); } @@ -396,8 +402,19 @@ static u32 qede_xmit_type(struct qede_dev *edev, (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) *ipv6_ext = 1; - if (skb->encapsulation) + if (skb->encapsulation) { rc |= XMIT_ENC; + if (skb_is_gso(skb)) { + unsigned short gso_type = skb_shinfo(skb)->gso_type; + + if ((gso_type & SKB_GSO_UDP_TUNNEL_CSUM) || + (gso_type & SKB_GSO_GRE_CSUM)) + rc |= XMIT_ENC_GSO_L4_CSUM; + + rc |= XMIT_LSO; + return rc; + } + } if (skb_is_gso(skb)) rc |= XMIT_LSO; @@ -633,6 +650,12 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb, if (unlikely(xmit_type & XMIT_ENC)) { first_bd->data.bd_flags.bitfields |= 1 << ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT; + + if (xmit_type & XMIT_ENC_GSO_L4_CSUM) { + u8 tmp = ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT; + + first_bd->data.bd_flags.bitfields |= 1 << tmp; + } hlen = qede_get_skb_hlen(skb, true); } else { first_bd->data.bd_flags.bitfields |= @@ -2218,6 +2241,40 @@ static void qede_udp_tunnel_del(struct net_device *dev, schedule_delayed_work(&edev->sp_task, 0); } +/* 8B udp header + 8B base tunnel header + 32B option length */ +#define QEDE_MAX_TUN_HDR_LEN 48 + +static netdev_features_t qede_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + if (skb->encapsulation) { + u8 l4_proto = 0; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_hdr(skb)->nexthdr; + break; + default: + return features; + } + + /* Disable offloads for geneve tunnels, as HW can't parse + * the geneve header which has option length greater than 32B. + */ + if ((l4_proto == IPPROTO_UDP) && + ((skb_inner_mac_header(skb) - + skb_transport_header(skb)) > QEDE_MAX_TUN_HDR_LEN)) + return features & ~(NETIF_F_CSUM_MASK | + NETIF_F_GSO_MASK); + } + + return features; +} + static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, @@ -2242,6 +2299,7 @@ static const struct net_device_ops qede_netdev_ops = { #endif .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, + .ndo_features_check = qede_features_check, }; /* ------------------------------------------------------------------------- @@ -2308,6 +2366,8 @@ static void qede_init_ndev(struct qede_dev *edev) qede_set_ethtool_ops(ndev); + ndev->priv_flags |= IFF_UNICAST_FLT; + /* user-changeble features */ hw_features = NETIF_F_GRO | NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -2315,11 +2375,14 @@ static void qede_init_ndev(struct qede_dev *edev) /* Encap features*/ hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_TSO_ECN; + NETIF_F_TSO_ECN | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM; ndev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM; + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM; ndev->vlan_features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_HIGHDMA; @@ -2329,8 +2392,14 @@ static void qede_init_ndev(struct qede_dev *edev) ndev->hw_features = hw_features; + /* MTU range: 46 - 9600 */ + ndev->min_mtu = ETH_ZLEN - ETH_HLEN; + ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE; + /* Set network device HW mac */ ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac); + + ndev->mtu = edev->dev_info.common.mtu; } /* This function converts from 32b param to two params of level and module @@ -2638,6 +2707,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) /* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); + if (system_state == SYSTEM_POWER_OFF) + return; qed_ops->common->remove(cdev); dev_info(&pdev->dev, "Ending qede_remove successfully\n"); @@ -2648,6 +2719,11 @@ static void qede_remove(struct pci_dev *pdev) __qede_remove(pdev, QEDE_REMOVE_NORMAL); } +static void qede_shutdown(struct pci_dev *pdev) +{ + __qede_remove(pdev, QEDE_REMOVE_NORMAL); +} + /* ------------------------------------------------------------------------- * START OF LOAD / UNLOAD * ------------------------------------------------------------------------- @@ -3686,6 +3762,8 @@ static int qede_open(struct net_device *ndev) udp_tunnel_get_rx_info(ndev); + edev->ops->common->update_drv_state(edev->cdev, true); + return 0; } @@ -3695,6 +3773,8 @@ static int qede_close(struct net_device *ndev) qede_unload(edev, QEDE_UNLOAD_NORMAL); + edev->ops->common->update_drv_state(edev->cdev, false); + return 0; } @@ -3755,6 +3835,8 @@ static int qede_set_mac_addr(struct net_device *ndev, void *p) if (rc) return rc; + edev->ops->common->update_mac(edev->cdev, addr->sa_data); + /* Add MAC filter according to the new unicast HW MAC address */ ether_addr_copy(edev->primary_mac, ndev->dev_addr); return qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_ADD, @@ -3877,7 +3959,7 @@ static void qede_config_rx_mode(struct net_device *ndev) /* Check for promiscuous */ if ((ndev->flags & IFF_PROMISC) || - (uc_count > 15)) { /* @@@TBD resource allocation - 1 */ + (uc_count > edev->dev_info.num_mac_filters - 1)) { accept_flags = QED_FILTER_RX_MODE_TYPE_PROMISC; } else { /* Add MAC filters according to the unicast secondary macs */ diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b09a6b80d107..5c100ab86c00 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3755,7 +3755,6 @@ static const struct net_device_ops ql3xxx_netdev_ops = { .ndo_open = ql3xxx_open, .ndo_start_xmit = ql3xxx_send, .ndo_stop = ql3xxx_close, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ql3xxx_set_mac_address, .ndo_tx_timeout = ql3xxx_tx_timeout, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 509b596cf1e8..838cc0ceafd8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -1024,12 +1024,6 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu) struct qlcnic_adapter *adapter = netdev_priv(netdev); int rc = 0; - if (mtu < P3P_MIN_MTU || mtu > P3P_MAX_MTU) { - dev_err(&adapter->netdev->dev, "%d bytes < mtu < %d bytes" - " not supported\n", P3P_MAX_MTU, P3P_MIN_MTU); - return -EINVAL; - } - rc = qlcnic_fw_cmd_set_mtu(adapter, mtu); if (!rc) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 3ae3968b0edf..4c0cce962585 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2342,6 +2342,10 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, netdev->priv_flags |= IFF_UNICAST_FLT; netdev->irq = adapter->msix_entries[0].vector; + /* MTU range: 68 - 9600 */ + netdev->min_mtu = P3P_MIN_MTU; + netdev->max_mtu = P3P_MAX_MTU; + err = qlcnic_set_real_num_queues(adapter, adapter->drv_tx_rings, adapter->drv_sds_rings); if (err) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index fd4a8e473f11..1409412ab39d 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4788,6 +4788,13 @@ static int qlge_probe(struct pci_dev *pdev, ndev->ethtool_ops = &qlge_ethtool_ops; ndev->watchdog_timeo = 10 * HZ; + /* MTU range: this driver only supports 1500 or 9000, so this only + * filters out values above or below, and we'll rely on + * qlge_change_mtu to make sure only 1500 or 9000 are allowed + */ + ndev->min_mtu = ETH_DATA_LEN; + ndev->max_mtu = 9000; + err = register_netdev(ndev); if (err) { dev_err(&pdev->dev, "net device registration failed.\n"); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 4fede4b86538..8be526af659a 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -239,15 +239,8 @@ static void emac_rx_mode_set(struct net_device *netdev) /* Change the Maximum Transfer Unit (MTU) */ static int emac_change_mtu(struct net_device *netdev, int new_mtu) { - unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct emac_adapter *adpt = netdev_priv(netdev); - if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) || - (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) { - netdev_err(adpt->netdev, "error: invalid MTU setting\n"); - return -EINVAL; - } - netif_info(adpt, hw, adpt->netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -680,6 +673,12 @@ static int emac_probe(struct platform_device *pdev) netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + /* MTU range: 46 - 9194 */ + netdev->min_mtu = EMAC_MIN_ETH_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + netdev->max_mtu = EMAC_MAX_ETH_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + INIT_WORK(&adpt->work_thread, emac_work_thread); /* Initialize queues */ diff --git a/drivers/net/ethernet/qualcomm/qca_framing.h b/drivers/net/ethernet/qualcomm/qca_framing.h index 5d965959c978..d5e795dcdf47 100644 --- a/drivers/net/ethernet/qualcomm/qca_framing.h +++ b/drivers/net/ethernet/qualcomm/qca_framing.h @@ -43,9 +43,9 @@ /* Frame length is invalid */ #define QCAFRM_INVFRAME (QCAFRM_ERR_BASE - 4) -/* Min/Max Ethernet MTU */ -#define QCAFRM_ETHMINMTU 46 -#define QCAFRM_ETHMAXMTU 1500 +/* Min/Max Ethernet MTU: 46/1500 */ +#define QCAFRM_ETHMINMTU (ETH_ZLEN - ETH_HLEN) +#define QCAFRM_ETHMAXMTU ETH_DATA_LEN /* Min/Max frame lengths */ #define QCAFRM_ETHMINLEN (QCAFRM_ETHMINMTU + ETH_HLEN) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 6e2add979471..513e6c74e199 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -780,24 +780,12 @@ qcaspi_netdev_uninit(struct net_device *dev) dev_kfree_skb(qca->rx_skb); } -static int -qcaspi_netdev_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < QCAFRM_ETHMINMTU) || (new_mtu > QCAFRM_ETHMAXMTU)) - return -EINVAL; - - dev->mtu = new_mtu; - - return 0; -} - static const struct net_device_ops qcaspi_netdev_ops = { .ndo_init = qcaspi_netdev_init, .ndo_uninit = qcaspi_netdev_uninit, .ndo_open = qcaspi_netdev_open, .ndo_stop = qcaspi_netdev_close, .ndo_start_xmit = qcaspi_netdev_xmit, - .ndo_change_mtu = qcaspi_netdev_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_tx_timeout = qcaspi_netdev_tx_timeout, .ndo_validate_addr = eth_validate_addr, @@ -814,6 +802,10 @@ qcaspi_netdev_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->tx_queue_len = 100; + /* MTU range: 46 - 1500 */ + dev->min_mtu = QCAFRM_ETHMINMTU; + dev->max_mtu = QCAFRM_ETHMAXMTU; + qca = netdev_priv(dev); memset(qca, 0, sizeof(struct qcaspi)); diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 5ef5d728c250..4ff4e0491406 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -969,7 +969,6 @@ static const struct net_device_ops r6040_netdev_ops = { .ndo_start_xmit = r6040_start_xmit, .ndo_get_stats = r6040_get_stats, .ndo_set_rx_mode = r6040_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_do_ioctl = r6040_ioctl, diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 5297bf77211c..b7c89ebcf4a2 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1277,10 +1277,6 @@ static int cp_change_mtu(struct net_device *dev, int new_mtu) { struct cp_private *cp = netdev_priv(dev); - /* check for invalid MTU, according to hardware limits */ - if (new_mtu < CP_MIN_MTU || new_mtu > CP_MAX_MTU) - return -EINVAL; - /* if network interface not up, no need for complexity */ if (!netif_running(dev)) { dev->mtu = new_mtu; @@ -2010,6 +2006,10 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; + /* MTU range: 60 - 4096 */ + dev->min_mtu = CP_MIN_MTU; + dev->max_mtu = CP_MAX_MTU; + rc = register_netdev(dev); if (rc) goto err_out_iomap; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index da4c2d8a4173..9bc047ac883b 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -924,19 +924,10 @@ static int rtl8139_set_features(struct net_device *dev, netdev_features_t featur return 0; } -static int rtl8139_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > MAX_ETH_DATA_SIZE) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops rtl8139_netdev_ops = { .ndo_open = rtl8139_open, .ndo_stop = rtl8139_close, .ndo_get_stats64 = rtl8139_get_stats64, - .ndo_change_mtu = rtl8139_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = rtl8139_set_mac_address, .ndo_start_xmit = rtl8139_start_xmit, @@ -1022,6 +1013,10 @@ static int rtl8139_init_one(struct pci_dev *pdev, dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; + /* MTU range: 68 - 1770 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = MAX_ETH_DATA_SIZE; + /* tp zeroed and aligned in alloc_etherdev */ tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 5cb96785fb63..570ed3bd3cbf 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -245,7 +245,6 @@ static const struct net_device_ops atp_netdev_ops = { .ndo_start_xmit = atp_send_packet, .ndo_set_rx_mode = set_rx_mode, .ndo_tx_timeout = tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index bf000d819a21..2830190aaace 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6673,10 +6673,6 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) { struct rtl8169_private *tp = netdev_priv(dev); - if (new_mtu < ETH_ZLEN || - new_mtu > rtl_chip_infos[tp->mac_version].jumbo_max) - return -EINVAL; - if (new_mtu > ETH_DATA_LEN) rtl_hw_jumbo_enable(tp); else @@ -8431,6 +8427,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; + /* MTU range: 60 - hw-specific max */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = rtl_chip_infos[chipset].jumbo_max; + tp->hw_start = cfg->hw_start; tp->event_slow = cfg->event_slow; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 630536bc72f9..27cfec3154c8 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1780,7 +1780,6 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_do_ioctl = ravb_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, }; /* MDIO bus init function */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 05b0dc55de77..e443695c2757 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2914,7 +2914,6 @@ static const struct net_device_ops sh_eth_netdev_ops = { .ndo_do_ioctl = sh_eth_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, }; static const struct net_device_ops sh_eth_netdev_ops_tsu = { @@ -2929,7 +2928,6 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = { .ndo_do_ioctl = sh_eth_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = eth_change_mtu, }; #ifdef CONFIG_OF diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 24b746406bc7..67df4cf93362 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1953,12 +1953,6 @@ static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) int running = netif_running(dev); int err; -#define ROCKER_PORT_MIN_MTU 68 -#define ROCKER_PORT_MAX_MTU 9000 - - if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU) - return -EINVAL; - if (running) rocker_port_stop(dev); @@ -2536,9 +2530,11 @@ static void rocker_port_dev_addr_init(struct rocker_port *rocker_port) } } +#define ROCKER_PORT_MIN_MTU ETH_MIN_MTU +#define ROCKER_PORT_MAX_MTU 9000 static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) { - const struct pci_dev *pdev = rocker->pdev; + struct pci_dev *pdev = rocker->pdev; struct rocker_port *rocker_port; struct net_device *dev; int err; @@ -2546,6 +2542,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev = alloc_etherdev(sizeof(struct rocker_port)); if (!dev) return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); rocker_port = netdev_priv(dev); rocker_port->dev = dev; rocker_port->rocker = rocker; @@ -2570,6 +2567,10 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG; + /* MTU range: 68 - 9000 */ + dev->min_mtu = ROCKER_PORT_MIN_MTU; + dev->max_mtu = ROCKER_PORT_MAX_MTU; + err = rocker_world_port_pre_init(rocker_port); if (err) { dev_err(&pdev->dev, "port world pre-init failed\n"); @@ -2839,20 +2840,37 @@ static bool rocker_port_dev_check_under(const struct net_device *dev, return true; } +struct rocker_walk_data { + struct rocker *rocker; + struct rocker_port *port; +}; + +static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) +{ + struct rocker_walk_data *data = _data; + int ret = 0; + + if (rocker_port_dev_check_under(lower_dev, data->rocker)) { + data->port = netdev_priv(lower_dev); + ret = 1; + } + + return ret; +} + struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, struct rocker *rocker) { - struct net_device *lower_dev; - struct list_head *iter; + struct rocker_walk_data data; if (rocker_port_dev_check_under(dev, rocker)) return netdev_priv(dev); - netdev_for_each_all_lower_dev(dev, lower_dev, iter) { - if (rocker_port_dev_check_under(lower_dev, rocker)) - return netdev_priv(lower_dev); - } - return NULL; + data.rocker = rocker; + data.port = NULL; + netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data); + + return data.port; } static int rocker_netdevice_event(struct notifier_block *unused, diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index ea44a2456ce1..5dbe40640da6 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1820,19 +1820,6 @@ static int sxgbe_set_features(struct net_device *dev, */ static int sxgbe_change_mtu(struct net_device *dev, int new_mtu) { - /* RFC 791, page 25, "Every internet module must be able to forward - * a datagram of 68 octets without further fragmentation." - */ - if (new_mtu < MIN_MTU || (new_mtu > MAX_MTU)) { - netdev_err(dev, "invalid MTU, MTU should be in between %d and %d\n", - MIN_MTU, MAX_MTU); - return -EINVAL; - } - - /* Return if the buffer sizes will not change */ - if (dev->mtu == new_mtu) - return 0; - dev->mtu = new_mtu; if (!netif_running(dev)) @@ -2144,6 +2131,10 @@ struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device, /* assign filtering support */ ndev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 68 - 9000 */ + ndev->min_mtu = MIN_MTU; + ndev->max_mtu = MAX_MTU; + priv->msg_enable = netif_msg_init(debug, default_msg_level); /* Enable TCP segmentation offload for all DMA channels */ diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index bdac936a68bc..244c1e171017 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -745,7 +745,6 @@ static const struct net_device_ops ether3_netdev_ops = { .ndo_set_rx_mode = ether3_setmulticastlist, .ndo_tx_timeout = ether3_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index c2bd5378ffda..ed34196028b8 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -714,7 +714,6 @@ static const struct net_device_ops sgiseeq_netdev_ops = { .ndo_tx_timeout = timeout, .ndo_set_rx_mode = sgiseeq_set_multicast, .ndo_set_mac_address = sgiseeq_set_mac_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 00279da6a1e8..e61807e6d47b 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2245,6 +2245,86 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) } } +#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\ + 1 << RSS_MODE_HASH_DST_ADDR_LBN) +#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\ + 1 << RSS_MODE_HASH_DST_PORT_LBN) +#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) + +static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags) +{ + /* Firmware had a bug (sfc bug 61952) where it would not actually + * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS. + * This meant that it would always contain whatever was previously + * in the MCDI buffer. Fortunately, all firmware versions with + * this bug have the same default flags value for a newly-allocated + * RSS context, and the only time we want to get the flags is just + * after allocating. Moreover, the response has a 32-bit hole + * where the context ID would be in the request, so we can use an + * overlength buffer in the request and pre-fill the flags field + * with what we believe the default to be. Thus if the firmware + * has the bug, it will leave our pre-filled value in the flags + * field of the response, and we will get the right answer. + * + * However, this does mean that this function should NOT be used if + * the RSS context flags might not be their defaults - it is ONLY + * reliably correct for a newly-allocated RSS context. + */ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + size_t outlen; + int rc; + + /* Check we have a hole for the context ID */ + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS, + RSS_CONTEXT_FLAGS_DEFAULT); + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc == 0) { + if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN) + rc = -EIO; + else + *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS); + } + return rc; +} + +/* Attempt to enable 4-tuple UDP hashing on the specified RSS context. + * If we fail, we just leave the RSS context at its default hash settings, + * which is safe but may slightly reduce performance. + * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we + * just need to set the UDP ports flags (for both IP versions). + */ +static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); + u32 flags; + + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0); + + if (efx_ef10_get_rss_flags(efx, context, &flags) != 0) + return; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context); + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN; + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags); + if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf), + NULL, 0, NULL)) + /* Succeeded, so UDP 4-tuple is now enabled */ + efx->rx_hash_udp_4tuple = true; +} + static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, bool exclusive, unsigned *context_size) { @@ -2290,6 +2370,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, if (context_size) *context_size = rss_spread; + if (nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN) + efx_ef10_set_rss_flags(efx, *context); + return 0; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 6b89e4a7b164..649bc508fa4b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2266,18 +2266,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) rc = efx_check_disabled(efx); if (rc) return rc; - if (new_mtu > EFX_MAX_MTU) { - netif_err(efx, drv, efx->net_dev, - "Requested MTU of %d too big (max: %d)\n", - new_mtu, EFX_MAX_MTU); - return -EINVAL; - } - if (new_mtu < EFX_MIN_MTU) { - netif_err(efx, drv, efx->net_dev, - "Requested MTU of %d too small (min: %d)\n", - new_mtu, EFX_MIN_MTU); - return -EINVAL; - } netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); @@ -2481,6 +2469,8 @@ static int efx_register_netdev(struct efx_nic *efx) net_dev->priv_flags |= IFF_UNICAST_FLT; net_dev->ethtool_ops = &efx_ethtool_ops; net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; + net_dev->min_mtu = EFX_MIN_MTU; + net_dev->max_mtu = EFX_MAX_MTU; rtnl_lock(); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 445ccdb6bc67..bf126f935ade 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -968,20 +968,24 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, info->data = 0; switch (info->flow_type) { + case UDP_V4_FLOW: + if (efx->rx_hash_udp_4tuple) + /* fall through */ case TCP_V4_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; /* fall through */ - case UDP_V4_FLOW: case SCTP_V4_FLOW: case AH_ESP_V4_FLOW: case IPV4_FLOW: info->data |= RXH_IP_SRC | RXH_IP_DST; min_revision = EFX_REV_FALCON_B0; break; + case UDP_V6_FLOW: + if (efx->rx_hash_udp_4tuple) + /* fall through */ case TCP_V6_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; /* fall through */ - case UDP_V6_FLOW: case SCTP_V6_FLOW: case AH_ESP_V6_FLOW: case IPV6_FLOW: diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 99d8c82124bb..fec51c4b2607 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -853,6 +853,7 @@ struct vfdi_status; * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS * @rx_scatter: Scatter mode enabled for receives + * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will @@ -990,6 +991,7 @@ struct efx_nic { u8 rx_hash_key[40]; u32 rx_indir_table[128]; bool rx_scatter; + bool rx_hash_udp_4tuple; unsigned int_error_count; unsigned long int_error_expire; diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 7a254da85dd7..42051ab98cf0 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -1225,7 +1225,6 @@ static const struct net_device_ops ioc3_netdev_ops = { .ndo_do_ioctl = ioc3_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ioc3_set_mac_address, - .ndo_change_mtu = eth_change_mtu, }; static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index aaa80f13859b..69d2d30e5ef1 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -815,7 +815,6 @@ static const struct net_device_ops meth_netdev_ops = { .ndo_start_xmit = meth_tx, .ndo_do_ioctl = meth_ioctl, .ndo_tx_timeout = meth_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_rx_mode = meth_set_rx_mode, diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c index 7426f8b21252..6c2e2b311c16 100644 --- a/drivers/net/ethernet/silan/sc92031.c +++ b/drivers/net/ethernet/silan/sc92031.c @@ -1386,7 +1386,6 @@ static const struct net_device_ops sc92031_netdev_ops = { .ndo_open = sc92031_open, .ndo_stop = sc92031_stop, .ndo_set_rx_mode = sc92031_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_tx_timeout = sc92031_tx_timeout, diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 27be6c869315..210e35d079dd 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1833,7 +1833,6 @@ static const struct net_device_ops sis190_netdev_ops = { .ndo_start_xmit = sis190_start_xmit, .ndo_tx_timeout = sis190_tx_timeout, .ndo_set_rx_mode = sis190_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = sis190_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 6f85276376e8..39fca6c0b68d 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -400,7 +400,6 @@ static const struct net_device_ops sis900_netdev_ops = { .ndo_start_xmit = sis900_start_xmit, .ndo_set_config = sis900_set_config, .ndo_set_rx_mode = set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_do_ioctl = mii_ioctl, diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 7186b89269ad..fe9760ffab51 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -313,7 +313,6 @@ static const struct net_device_ops epic_netdev_ops = { .ndo_get_stats = epic_get_stats, .ndo_set_rx_mode = set_rx_mode, .ndo_do_ioctl = netdev_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index cb49c9654f0a..4f19c6166182 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1753,7 +1753,6 @@ static const struct net_device_ops smc911x_netdev_ops = { .ndo_start_xmit = smc911x_hard_start_xmit, .ndo_tx_timeout = smc911x_timeout, .ndo_set_rx_mode = smc911x_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index d496888b85d3..c8d84679ede7 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -809,7 +809,6 @@ static const struct net_device_ops smc_netdev_ops = { .ndo_start_xmit = smc_wait_to_send_packet, .ndo_tx_timeout = smc_timeout, .ndo_set_rx_mode = smc_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index db3c696d7002..f1c75e291e55 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -294,7 +294,6 @@ static const struct net_device_ops smc_netdev_ops = { .ndo_set_config = s9k_config, .ndo_set_rx_mode = set_rx_mode, .ndo_do_ioctl = smc_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 73212590d04a..65077c77082a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -602,7 +602,8 @@ static void smc_hardware_send_pkt(unsigned long data) SMC_PUSH_DATA(lp, buf, len & ~1); /* Send final ctl word with the last byte if there is one */ - SMC_outw(((len & 1) ? (0x2000 | buf[len-1]) : 0), ioaddr, DATA_REG(lp)); + SMC_outw(lp, ((len & 1) ? (0x2000 | buf[len - 1]) : 0), ioaddr, + DATA_REG(lp)); /* * If THROTTLE_TX_PKTS is set, we stop the queue here. This will @@ -1762,7 +1763,6 @@ static const struct net_device_ops smc_netdev_ops = { .ndo_start_xmit = smc_hard_start_xmit, .ndo_tx_timeout = smc_timeout, .ndo_set_rx_mode = smc_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2326,6 +2326,8 @@ static int smc_drv_probe(struct platform_device *pdev) if (!device_property_read_u32(&pdev->dev, "reg-shift", &val)) lp->io_shift = val; + lp->cfg.pxa_u16_align4 = + device_property_read_bool(&pdev->dev, "pxa-u16-align4"); } #endif diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index ea8465467469..08b17adf0a65 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -86,11 +86,11 @@ #define SMC_inl(a, r) readl((a) + (r)) #define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) \ +#define SMC_outw(lp, v, a, r) \ do { \ unsigned int __v = v, __smc_r = r; \ if (SMC_16BIT(lp)) \ - __SMC_outw(__v, a, __smc_r); \ + __SMC_outw(lp, __v, a, __smc_r); \ else if (SMC_8BIT(lp)) \ SMC_outw_b(__v, a, __smc_r); \ else \ @@ -107,10 +107,10 @@ #define SMC_IRQ_FLAGS (-1) /* from resource */ /* We actually can't write halfwords properly if not word aligned */ -static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) +static inline void _SMC_outw_align4(u16 val, void __iomem *ioaddr, int reg, + bool use_align4_workaround) { - if ((machine_is_mainstone() || machine_is_stargate2() || - machine_is_pxa_idp()) && reg & 2) { + if (use_align4_workaround) { unsigned int v = val << 16; v |= readl(ioaddr + (reg & ~2)) & 0xffff; writel(v, ioaddr + (reg & ~2)); @@ -119,6 +119,12 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) } } +#define __SMC_outw(lp, v, a, r) \ + _SMC_outw_align4((v), (a), (r), \ + IS_BUILTIN(CONFIG_ARCH_PXA) && ((r) & 2) && \ + (lp)->cfg.pxa_u16_align4) + + #elif defined(CONFIG_SH_SH4202_MICRODEV) #define SMC_CAN_USE_8BIT 0 @@ -129,7 +135,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_inw(a, r) inw((a) + (r) - 0xa0000000) #define SMC_inl(a, r) inl((a) + (r) - 0xa0000000) #define SMC_outb(v, a, r) outb(v, (a) + (r) - 0xa0000000) -#define SMC_outw(v, a, r) outw(v, (a) + (r) - 0xa0000000) +#define SMC_outw(lp, v, a, r) outw(v, (a) + (r) - 0xa0000000) #define SMC_outl(v, a, r) outl(v, (a) + (r) - 0xa0000000) #define SMC_insl(a, r, p, l) insl((a) + (r) - 0xa0000000, p, l) #define SMC_outsl(a, r, p, l) outsl((a) + (r) - 0xa0000000, p, l) @@ -147,7 +153,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_inb(a, r) inb(((u32)a) + (r)) #define SMC_inw(a, r) inw(((u32)a) + (r)) #define SMC_outb(v, a, r) outb(v, ((u32)a) + (r)) -#define SMC_outw(v, a, r) outw(v, ((u32)a) + (r)) +#define SMC_outw(lp, v, a, r) outw(v, ((u32)a) + (r)) #define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l) #define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l) @@ -175,7 +181,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_inw(a, r) readw((a) + (r)) #define SMC_inl(a, r) readl((a) + (r)) #define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) +#define SMC_outw(lp, v, a, r) writew(v, (a) + (r)) #define SMC_outl(v, a, r) writel(v, (a) + (r)) #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) @@ -207,7 +213,7 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l) } #define SMC_inw(a, r) _swapw(readw((a) + (r))) -#define SMC_outw(v, a, r) writew(_swapw(v), (a) + (r)) +#define SMC_outw(lp, v, a, r) writew(_swapw(v), (a) + (r)) #define SMC_insw(a, r, p, l) mcf_insw(a + r, p, l) #define SMC_outsw(a, r, p, l) mcf_outsw(a + r, p, l) @@ -241,7 +247,7 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l) #define SMC_inw(a, r) ioread16((a) + (r)) #define SMC_inl(a, r) ioread32((a) + (r)) #define SMC_outb(v, a, r) iowrite8(v, (a) + (r)) -#define SMC_outw(v, a, r) iowrite16(v, (a) + (r)) +#define SMC_outw(lp, v, a, r) iowrite16(v, (a) + (r)) #define SMC_outl(v, a, r) iowrite32(v, (a) + (r)) #define SMC_insw(a, r, p, l) ioread16_rep((a) + (r), p, l) #define SMC_outsw(a, r, p, l) iowrite16_rep((a) + (r), p, l) @@ -303,6 +309,8 @@ struct smc_local { /* the low address lines on some platforms aren't connected... */ int io_shift; + /* on some platforms a u16 write must be 4-bytes aligned */ + bool half_word_align4; struct smc91x_platdata cfg; }; @@ -457,7 +465,7 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma, #if ! SMC_CAN_USE_16BIT -#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) +#define SMC_outw(lp, x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) #define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg) #define SMC_insw(a, r, p, l) BUG() #define SMC_outsw(a, r, p, l) BUG() @@ -909,7 +917,7 @@ static const char * chip_ids[ 16 ] = { else if (SMC_8BIT(lp)) \ SMC_outb(x, ioaddr, PN_REG(lp)); \ else \ - SMC_outw(x, ioaddr, PN_REG(lp)); \ + SMC_outw(lp, x, ioaddr, PN_REG(lp)); \ } while (0) #define SMC_GET_AR(lp) \ @@ -937,7 +945,7 @@ static const char * chip_ids[ 16 ] = { int __mask; \ local_irq_save(__flags); \ __mask = SMC_inw(ioaddr, INT_REG(lp)) & ~0xff; \ - SMC_outw(__mask | (x), ioaddr, INT_REG(lp)); \ + SMC_outw(lp, __mask | (x), ioaddr, INT_REG(lp)); \ local_irq_restore(__flags); \ } \ } while (0) @@ -951,7 +959,7 @@ static const char * chip_ids[ 16 ] = { if (SMC_8BIT(lp)) \ SMC_outb(x, ioaddr, IM_REG(lp)); \ else \ - SMC_outw((x) << 8, ioaddr, INT_REG(lp)); \ + SMC_outw(lp, (x) << 8, ioaddr, INT_REG(lp)); \ } while (0) #define SMC_CURRENT_BANK(lp) SMC_inw(ioaddr, BANK_SELECT) @@ -961,22 +969,22 @@ static const char * chip_ids[ 16 ] = { if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, 12<<SMC_IO_SHIFT); \ else \ - SMC_outw(x, ioaddr, BANK_SELECT); \ + SMC_outw(lp, x, ioaddr, BANK_SELECT); \ } while (0) #define SMC_GET_BASE(lp) SMC_inw(ioaddr, BASE_REG(lp)) -#define SMC_SET_BASE(lp, x) SMC_outw(x, ioaddr, BASE_REG(lp)) +#define SMC_SET_BASE(lp, x) SMC_outw(lp, x, ioaddr, BASE_REG(lp)) #define SMC_GET_CONFIG(lp) SMC_inw(ioaddr, CONFIG_REG(lp)) -#define SMC_SET_CONFIG(lp, x) SMC_outw(x, ioaddr, CONFIG_REG(lp)) +#define SMC_SET_CONFIG(lp, x) SMC_outw(lp, x, ioaddr, CONFIG_REG(lp)) #define SMC_GET_COUNTER(lp) SMC_inw(ioaddr, COUNTER_REG(lp)) #define SMC_GET_CTL(lp) SMC_inw(ioaddr, CTL_REG(lp)) -#define SMC_SET_CTL(lp, x) SMC_outw(x, ioaddr, CTL_REG(lp)) +#define SMC_SET_CTL(lp, x) SMC_outw(lp, x, ioaddr, CTL_REG(lp)) #define SMC_GET_MII(lp) SMC_inw(ioaddr, MII_REG(lp)) @@ -987,20 +995,20 @@ static const char * chip_ids[ 16 ] = { if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 8, 1)); \ else \ - SMC_outw(x, ioaddr, GP_REG(lp)); \ + SMC_outw(lp, x, ioaddr, GP_REG(lp)); \ } while (0) -#define SMC_SET_MII(lp, x) SMC_outw(x, ioaddr, MII_REG(lp)) +#define SMC_SET_MII(lp, x) SMC_outw(lp, x, ioaddr, MII_REG(lp)) #define SMC_GET_MIR(lp) SMC_inw(ioaddr, MIR_REG(lp)) -#define SMC_SET_MIR(lp, x) SMC_outw(x, ioaddr, MIR_REG(lp)) +#define SMC_SET_MIR(lp, x) SMC_outw(lp, x, ioaddr, MIR_REG(lp)) #define SMC_GET_MMU_CMD(lp) SMC_inw(ioaddr, MMU_CMD_REG(lp)) -#define SMC_SET_MMU_CMD(lp, x) SMC_outw(x, ioaddr, MMU_CMD_REG(lp)) +#define SMC_SET_MMU_CMD(lp, x) SMC_outw(lp, x, ioaddr, MMU_CMD_REG(lp)) -#define SMC_GET_FIFO(lp) SMC_inw(ioaddr, FIFO_REG(lp)) +#define SMC_GET_FIFO(lp) SMC_inw(ioaddr, FIFO_REG(lp)) #define SMC_GET_PTR(lp) SMC_inw(ioaddr, PTR_REG(lp)) @@ -1009,14 +1017,14 @@ static const char * chip_ids[ 16 ] = { if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 4, 2)); \ else \ - SMC_outw(x, ioaddr, PTR_REG(lp)); \ + SMC_outw(lp, x, ioaddr, PTR_REG(lp)); \ } while (0) #define SMC_GET_EPH_STATUS(lp) SMC_inw(ioaddr, EPH_STATUS_REG(lp)) #define SMC_GET_RCR(lp) SMC_inw(ioaddr, RCR_REG(lp)) -#define SMC_SET_RCR(lp, x) SMC_outw(x, ioaddr, RCR_REG(lp)) +#define SMC_SET_RCR(lp, x) SMC_outw(lp, x, ioaddr, RCR_REG(lp)) #define SMC_GET_REV(lp) SMC_inw(ioaddr, REV_REG(lp)) @@ -1027,12 +1035,12 @@ static const char * chip_ids[ 16 ] = { if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 8, 0)); \ else \ - SMC_outw(x, ioaddr, RPC_REG(lp)); \ + SMC_outw(lp, x, ioaddr, RPC_REG(lp)); \ } while (0) #define SMC_GET_TCR(lp) SMC_inw(ioaddr, TCR_REG(lp)) -#define SMC_SET_TCR(lp, x) SMC_outw(x, ioaddr, TCR_REG(lp)) +#define SMC_SET_TCR(lp, x) SMC_outw(lp, x, ioaddr, TCR_REG(lp)) #ifndef SMC_GET_MAC_ADDR #define SMC_GET_MAC_ADDR(lp, addr) \ @@ -1049,18 +1057,18 @@ static const char * chip_ids[ 16 ] = { #define SMC_SET_MAC_ADDR(lp, addr) \ do { \ - SMC_outw(addr[0]|(addr[1] << 8), ioaddr, ADDR0_REG(lp)); \ - SMC_outw(addr[2]|(addr[3] << 8), ioaddr, ADDR1_REG(lp)); \ - SMC_outw(addr[4]|(addr[5] << 8), ioaddr, ADDR2_REG(lp)); \ + SMC_outw(lp, addr[0] | (addr[1] << 8), ioaddr, ADDR0_REG(lp)); \ + SMC_outw(lp, addr[2] | (addr[3] << 8), ioaddr, ADDR1_REG(lp)); \ + SMC_outw(lp, addr[4] | (addr[5] << 8), ioaddr, ADDR2_REG(lp)); \ } while (0) #define SMC_SET_MCAST(lp, x) \ do { \ const unsigned char *mt = (x); \ - SMC_outw(mt[0] | (mt[1] << 8), ioaddr, MCAST_REG1(lp)); \ - SMC_outw(mt[2] | (mt[3] << 8), ioaddr, MCAST_REG2(lp)); \ - SMC_outw(mt[4] | (mt[5] << 8), ioaddr, MCAST_REG3(lp)); \ - SMC_outw(mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4(lp)); \ + SMC_outw(lp, mt[0] | (mt[1] << 8), ioaddr, MCAST_REG1(lp)); \ + SMC_outw(lp, mt[2] | (mt[3] << 8), ioaddr, MCAST_REG2(lp)); \ + SMC_outw(lp, mt[4] | (mt[5] << 8), ioaddr, MCAST_REG3(lp)); \ + SMC_outw(lp, mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4(lp)); \ } while (0) #define SMC_PUT_PKT_HDR(lp, status, length) \ @@ -1069,8 +1077,8 @@ static const char * chip_ids[ 16 ] = { SMC_outl((status) | (length)<<16, ioaddr, \ DATA_REG(lp)); \ else { \ - SMC_outw(status, ioaddr, DATA_REG(lp)); \ - SMC_outw(length, ioaddr, DATA_REG(lp)); \ + SMC_outw(lp, status, ioaddr, DATA_REG(lp)); \ + SMC_outw(lp, length, ioaddr, DATA_REG(lp)); \ } \ } while (0) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index e9b8579e6241..cdb343f0c6e0 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2152,7 +2152,6 @@ static const struct net_device_ops smsc911x_netdev_ops = { .ndo_get_stats = smsc911x_get_stats, .ndo_set_rx_mode = smsc911x_set_multicast_list, .ndo_do_ioctl = smsc911x_do_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = smsc911x_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 3818c5e06eba..6e9fcc345798 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -69,6 +69,17 @@ config DWMAC_MESON the stmmac device driver. This driver is used for Meson6, Meson8, Meson8b and GXBB SoCs. +config DWMAC_OXNAS + tristate "Oxford Semiconductor OXNAS dwmac support" + default ARCH_OXNAS + depends on OF && COMMON_CLK && (ARCH_OXNAS || COMPILE_TEST) + select MFD_SYSCON + help + Support for Ethernet controller on Oxford Semiconductor OXNAS SoCs. + + This selects the Oxford Semiconductor OXNASSoC glue layer support for + the stmmac device driver. This driver is used for OX820. + config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 5d6ece5919b3..8f83a86ba13c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o obj-$(CONFIG_DWMAC_IPQ806X) += dwmac-ipq806x.o obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o +obj-$(CONFIG_DWMAC_OXNAS) += dwmac-oxnas.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c new file mode 100644 index 000000000000..c35597586121 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -0,0 +1,217 @@ +/* + * Oxford Semiconductor OXNAS DWMAC glue layer + * + * Copyright (C) 2016 Neil Armstrong <narmstrong@baylibre.com> + * Copyright (C) 2014 Daniel Golle <daniel@makrotopia.org> + * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com> + * Copyright (C) 2012 John Crispin <blogic@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/stmmac.h> + +#include "stmmac_platform.h" + +/* System Control regmap offsets */ +#define OXNAS_DWMAC_CTRL_REGOFFSET 0x78 +#define OXNAS_DWMAC_DELAY_REGOFFSET 0x100 + +/* Control Register */ +#define DWMAC_CKEN_RX_IN 14 +#define DWMAC_CKEN_RXN_OUT 13 +#define DWMAC_CKEN_RX_OUT 12 +#define DWMAC_CKEN_TX_IN 10 +#define DWMAC_CKEN_TXN_OUT 9 +#define DWMAC_CKEN_TX_OUT 8 +#define DWMAC_RX_SOURCE 7 +#define DWMAC_TX_SOURCE 6 +#define DWMAC_LOW_TX_SOURCE 4 +#define DWMAC_AUTO_TX_SOURCE 3 +#define DWMAC_RGMII 2 +#define DWMAC_SIMPLE_MUX 1 +#define DWMAC_CKEN_GTX 0 + +/* Delay register */ +#define DWMAC_TX_VARDELAY_SHIFT 0 +#define DWMAC_TXN_VARDELAY_SHIFT 8 +#define DWMAC_RX_VARDELAY_SHIFT 16 +#define DWMAC_RXN_VARDELAY_SHIFT 24 +#define DWMAC_TX_VARDELAY(d) ((d) << DWMAC_TX_VARDELAY_SHIFT) +#define DWMAC_TXN_VARDELAY(d) ((d) << DWMAC_TXN_VARDELAY_SHIFT) +#define DWMAC_RX_VARDELAY(d) ((d) << DWMAC_RX_VARDELAY_SHIFT) +#define DWMAC_RXN_VARDELAY(d) ((d) << DWMAC_RXN_VARDELAY_SHIFT) + +struct oxnas_dwmac { + struct device *dev; + struct clk *clk; + struct regmap *regmap; +}; + +static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) +{ + unsigned int value; + int ret; + + /* Reset HW here before changing the glue configuration */ + ret = device_reset(dwmac->dev); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk); + if (ret) + return ret; + + ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value); + if (ret < 0) { + clk_disable_unprepare(dwmac->clk); + return ret; + } + + /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */ + value |= BIT(DWMAC_CKEN_GTX) | + /* Use simple mux for 25/125 Mhz clock switching */ + BIT(DWMAC_SIMPLE_MUX) | + /* set auto switch tx clock source */ + BIT(DWMAC_AUTO_TX_SOURCE) | + /* enable tx & rx vardelay */ + BIT(DWMAC_CKEN_TX_OUT) | + BIT(DWMAC_CKEN_TXN_OUT) | + BIT(DWMAC_CKEN_TX_IN) | + BIT(DWMAC_CKEN_RX_OUT) | + BIT(DWMAC_CKEN_RXN_OUT) | + BIT(DWMAC_CKEN_RX_IN); + regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value); + + /* set tx & rx vardelay */ + value = DWMAC_TX_VARDELAY(4) | + DWMAC_TXN_VARDELAY(2) | + DWMAC_RX_VARDELAY(10) | + DWMAC_RXN_VARDELAY(8); + regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value); + + return 0; +} + +static int oxnas_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device_node *sysctrl; + struct oxnas_dwmac *dwmac; + int ret; + + sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0); + if (!sysctrl) { + dev_err(&pdev->dev, "failed to get sys-ctrl node\n"); + return -EINVAL; + } + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + dwmac->dev = &pdev->dev; + plat_dat->bsp_priv = dwmac; + + dwmac->regmap = syscon_node_to_regmap(sysctrl); + if (IS_ERR(dwmac->regmap)) { + dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); + return PTR_ERR(dwmac->regmap); + } + + dwmac->clk = devm_clk_get(&pdev->dev, "gmac"); + if (IS_ERR(dwmac->clk)) + return PTR_ERR(dwmac->clk); + + ret = oxnas_dwmac_init(dwmac); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int oxnas_dwmac_remove(struct platform_device *pdev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int oxnas_dwmac_suspend(struct device *dev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret; + + ret = stmmac_suspend(dev); + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int oxnas_dwmac_resume(struct device *dev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret; + + ret = oxnas_dwmac_init(dwmac); + if (ret) + return ret; + + ret = stmmac_resume(dev); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops, + oxnas_dwmac_suspend, oxnas_dwmac_resume); + +static const struct of_device_id oxnas_dwmac_match[] = { + { .compatible = "oxsemi,ox820-dwmac" }, + { } +}; +MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); + +static struct platform_driver oxnas_dwmac_driver = { + .probe = oxnas_dwmac_probe, + .remove = oxnas_dwmac_remove, + .driver = { + .name = "oxnas-dwmac", + .pm = &oxnas_dwmac_pm_ops, + .of_match_table = oxnas_dwmac_match, + }, +}; +module_platform_driver(oxnas_dwmac_driver); + +MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>"); +MODULE_DESCRIPTION("Oxford Semiconductor OXNAS DWMAC glue layer"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 3740a4417fa0..6b787d73b32a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -901,44 +901,6 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac) gmac_clk_enable(gmac, false); } -static int rk_gmac_init(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - return rk_gmac_powerup(bsp_priv); -} - -static void rk_gmac_exit(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - rk_gmac_powerdown(bsp_priv); -} - -static void rk_gmac_suspend(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - /* Keep the PHY up if we use Wake-on-Lan. */ - if (device_may_wakeup(&pdev->dev)) - return; - - rk_gmac_powerdown(bsp_priv); - bsp_priv->suspended = true; -} - -static void rk_gmac_resume(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - /* The PHY was up for Wake-on-Lan. */ - if (!bsp_priv->suspended) - return; - - rk_gmac_powerup(bsp_priv); - bsp_priv->suspended = false; -} - static void rk_fix_speed(void *priv, unsigned int speed) { struct rk_priv_data *bsp_priv = priv; @@ -974,23 +936,60 @@ static int rk_gmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); plat_dat->has_gmac = true; - plat_dat->init = rk_gmac_init; - plat_dat->exit = rk_gmac_exit; plat_dat->fix_mac_speed = rk_fix_speed; - plat_dat->suspend = rk_gmac_suspend; - plat_dat->resume = rk_gmac_resume; plat_dat->bsp_priv = rk_gmac_setup(pdev, data); if (IS_ERR(plat_dat->bsp_priv)) return PTR_ERR(plat_dat->bsp_priv); - ret = rk_gmac_init(pdev, plat_dat->bsp_priv); + ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) return ret; return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } +static int rk_gmac_remove(struct platform_device *pdev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + rk_gmac_powerdown(bsp_priv); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int rk_gmac_suspend(struct device *dev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); + int ret = stmmac_suspend(dev); + + /* Keep the PHY up if we use Wake-on-Lan. */ + if (!device_may_wakeup(dev)) { + rk_gmac_powerdown(bsp_priv); + bsp_priv->suspended = true; + } + + return ret; +} + +static int rk_gmac_resume(struct device *dev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); + + /* The PHY was up for Wake-on-Lan. */ + if (bsp_priv->suspended) { + rk_gmac_powerup(bsp_priv); + bsp_priv->suspended = false; + } + + return stmmac_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume); + static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, @@ -1003,10 +1002,10 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); static struct platform_driver rk_gmac_dwmac_driver = { .probe = rk_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", - .pm = &stmmac_pltfr_pm_ops, + .pm = &rk_gmac_pm_ops, .of_match_table = rk_gmac_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index bec6963ac71e..5ad1dfb40f2b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -367,8 +367,8 @@ static int socfpga_dwmac_resume(struct device *dev) * control register 0, and can be modified by the phy driver * framework. */ - if (priv->phydev) - phy_resume(priv->phydev); + if (ndev->phydev) + phy_resume(ndev->phydev); return stmmac_resume(dev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 58c05acc2aab..c9006ab083d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -126,8 +126,8 @@ struct sti_dwmac { struct clk *clk; /* PHY clock */ u32 ctrl_reg; /* GMAC glue-logic control register */ int clk_sel_reg; /* GMAC ext clk selection register */ - struct device *dev; struct regmap *regmap; + bool gmac_en; u32 speed; void (*fix_retime_src)(void *priv, unsigned int speed); }; @@ -191,7 +191,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd) } } - if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk && freq) + if (src == TX_RETIME_SRC_CLKGEN && freq) clk_set_rate(dwmac->clk, freq); regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK, @@ -222,26 +222,20 @@ static void stid127_fix_retime_src(void *priv, u32 spd) freq = DWMAC_2_5MHZ; } - if (dwmac->clk && freq) + if (freq) clk_set_rate(dwmac->clk, freq); regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val); } -static int sti_dwmac_init(struct platform_device *pdev, void *priv) +static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) { - struct sti_dwmac *dwmac = priv; struct regmap *regmap = dwmac->regmap; int iface = dwmac->interface; - struct device *dev = dwmac->dev; - struct device_node *np = dev->of_node; u32 reg = dwmac->ctrl_reg; u32 val; - if (dwmac->clk) - clk_prepare_enable(dwmac->clk); - - if (of_property_read_bool(np, "st,gmac_en")) + if (dwmac->gmac_en) regmap_update_bits(regmap, reg, EN_MASK, EN); regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); @@ -249,18 +243,11 @@ static int sti_dwmac_init(struct platform_device *pdev, void *priv) val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; regmap_update_bits(regmap, reg, ENMII_MASK, val); - dwmac->fix_retime_src(priv, dwmac->speed); + dwmac->fix_retime_src(dwmac, dwmac->speed); return 0; } -static void sti_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct sti_dwmac *dwmac = priv; - - if (dwmac->clk) - clk_disable_unprepare(dwmac->clk); -} static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, struct platform_device *pdev) { @@ -270,9 +257,6 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, struct regmap *regmap; int err; - if (!np) - return -EINVAL; - /* clk selection from extra syscfg register */ dwmac->clk_sel_reg = -ENXIO; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-clkconf"); @@ -289,9 +273,9 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, return err; } - dwmac->dev = dev; dwmac->interface = of_get_phy_mode(np); dwmac->regmap = regmap; + dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en"); dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk"); dwmac->tx_retime_src = TX_RETIME_SRC_NA; dwmac->speed = SPEED_100; @@ -357,17 +341,62 @@ static int sti_dwmac_probe(struct platform_device *pdev) dwmac->fix_retime_src = data->fix_retime_src; plat_dat->bsp_priv = dwmac; - plat_dat->init = sti_dwmac_init; - plat_dat->exit = sti_dwmac_exit; plat_dat->fix_mac_speed = data->fix_retime_src; - ret = sti_dwmac_init(pdev, plat_dat->bsp_priv); + ret = clk_prepare_enable(dwmac->clk); if (ret) return ret; - return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = sti_dwmac_set_mode(dwmac); + if (ret) + goto disable_clk; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + goto disable_clk; + + return 0; + +disable_clk: + clk_disable_unprepare(dwmac->clk); + return ret; } +static int sti_dwmac_remove(struct platform_device *pdev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int sti_dwmac_suspend(struct device *dev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret = stmmac_suspend(dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int sti_dwmac_resume(struct device *dev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); + + clk_prepare_enable(dwmac->clk); + sti_dwmac_set_mode(dwmac); + + return stmmac_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend, + sti_dwmac_resume); + static const struct sti_dwmac_of_data stih4xx_dwmac_data = { .fix_retime_src = stih4xx_fix_retime_src, }; @@ -387,10 +416,10 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match); static struct platform_driver sti_dwmac_driver = { .probe = sti_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = sti_dwmac_remove, .driver = { .name = "sti-dwmac", - .pm = &stmmac_pltfr_pm_ops, + .pm = &sti_dwmac_pm_ops, .of_match_table = sti_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b15fc55f1b96..758b4e2c783c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -90,7 +90,6 @@ struct stmmac_priv { struct mac_device_info *hw; spinlock_t lock; - struct phy_device *phydev ____cacheline_aligned_in_smp; int oldlink; int speed; int oldduplex; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 1e06173fc9d7..3fe9340b748f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -269,25 +269,26 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev, strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); } -static int stmmac_ethtool_getsettings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phy = priv->phydev; + struct phy_device *phy = dev->phydev; int rc; if (priv->hw->pcs & STMMAC_PCS_RGMII || priv->hw->pcs & STMMAC_PCS_SGMII) { struct rgmii_adv adv; + u32 supported, advertising, lp_advertising; if (!priv->xstats.pcs_link) { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; return 0; } - cmd->duplex = priv->xstats.pcs_duplex; + cmd->base.duplex = priv->xstats.pcs_duplex; - ethtool_cmd_speed_set(cmd, priv->xstats.pcs_speed); + cmd->base.speed = priv->xstats.pcs_speed; /* Get and convert ADV/LP_ADV from the HW AN registers */ if (!priv->hw->mac->pcs_get_adv_lp) @@ -297,45 +298,59 @@ static int stmmac_ethtool_getsettings(struct net_device *dev, /* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */ + ethtool_convert_link_mode_to_legacy_u32( + &supported, cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32( + &advertising, cmd->link_modes.advertising); + ethtool_convert_link_mode_to_legacy_u32( + &lp_advertising, cmd->link_modes.lp_advertising); + if (adv.pause & STMMAC_PCS_PAUSE) - cmd->advertising |= ADVERTISED_Pause; + advertising |= ADVERTISED_Pause; if (adv.pause & STMMAC_PCS_ASYM_PAUSE) - cmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; if (adv.lp_pause & STMMAC_PCS_PAUSE) - cmd->lp_advertising |= ADVERTISED_Pause; + lp_advertising |= ADVERTISED_Pause; if (adv.lp_pause & STMMAC_PCS_ASYM_PAUSE) - cmd->lp_advertising |= ADVERTISED_Asym_Pause; + lp_advertising |= ADVERTISED_Asym_Pause; /* Reg49[3] always set because ANE is always supported */ - cmd->autoneg = ADVERTISED_Autoneg; - cmd->supported |= SUPPORTED_Autoneg; - cmd->advertising |= ADVERTISED_Autoneg; - cmd->lp_advertising |= ADVERTISED_Autoneg; + cmd->base.autoneg = ADVERTISED_Autoneg; + supported |= SUPPORTED_Autoneg; + advertising |= ADVERTISED_Autoneg; + lp_advertising |= ADVERTISED_Autoneg; if (adv.duplex) { - cmd->supported |= (SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Full); - cmd->advertising |= (ADVERTISED_1000baseT_Full | - ADVERTISED_100baseT_Full | - ADVERTISED_10baseT_Full); + supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Full); + advertising |= (ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Full); } else { - cmd->supported |= (SUPPORTED_1000baseT_Half | - SUPPORTED_100baseT_Half | - SUPPORTED_10baseT_Half); - cmd->advertising |= (ADVERTISED_1000baseT_Half | - ADVERTISED_100baseT_Half | - ADVERTISED_10baseT_Half); + supported |= (SUPPORTED_1000baseT_Half | + SUPPORTED_100baseT_Half | + SUPPORTED_10baseT_Half); + advertising |= (ADVERTISED_1000baseT_Half | + ADVERTISED_100baseT_Half | + ADVERTISED_10baseT_Half); } if (adv.lp_duplex) - cmd->lp_advertising |= (ADVERTISED_1000baseT_Full | - ADVERTISED_100baseT_Full | - ADVERTISED_10baseT_Full); + lp_advertising |= (ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Full); else - cmd->lp_advertising |= (ADVERTISED_1000baseT_Half | - ADVERTISED_100baseT_Half | - ADVERTISED_10baseT_Half); - cmd->port = PORT_OTHER; + lp_advertising |= (ADVERTISED_1000baseT_Half | + ADVERTISED_100baseT_Half | + ADVERTISED_10baseT_Half); + cmd->base.port = PORT_OTHER; + + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.lp_advertising, lp_advertising); return 0; } @@ -350,16 +365,16 @@ static int stmmac_ethtool_getsettings(struct net_device *dev, "link speed / duplex setting\n", dev->name); return -EBUSY; } - cmd->transceiver = XCVR_INTERNAL; - rc = phy_ethtool_gset(phy, cmd); + rc = phy_ethtool_ksettings_get(phy, cmd); return rc; } -static int stmmac_ethtool_setsettings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int +stmmac_ethtool_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phy = priv->phydev; + struct phy_device *phy = dev->phydev; int rc; if (priv->hw->pcs & STMMAC_PCS_RGMII || @@ -367,7 +382,7 @@ static int stmmac_ethtool_setsettings(struct net_device *dev, u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause; /* Only support ANE */ - if (cmd->autoneg != AUTONEG_ENABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE) return -EINVAL; mask &= (ADVERTISED_1000baseT_Half | @@ -389,7 +404,7 @@ static int stmmac_ethtool_setsettings(struct net_device *dev, } spin_lock(&priv->lock); - rc = phy_ethtool_sset(phy, cmd); + rc = phy_ethtool_ksettings_set(phy, cmd); spin_unlock(&priv->lock); return rc; @@ -468,12 +483,12 @@ stmmac_get_pauseparam(struct net_device *netdev, if (!adv_lp.pause) return; } else { - if (!(priv->phydev->supported & SUPPORTED_Pause) || - !(priv->phydev->supported & SUPPORTED_Asym_Pause)) + if (!(netdev->phydev->supported & SUPPORTED_Pause) || + !(netdev->phydev->supported & SUPPORTED_Asym_Pause)) return; } - pause->autoneg = priv->phydev->autoneg; + pause->autoneg = netdev->phydev->autoneg; if (priv->flow_ctrl & FLOW_RX) pause->rx_pause = 1; @@ -487,7 +502,7 @@ stmmac_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct stmmac_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phydev; + struct phy_device *phy = netdev->phydev; int new_pause = FLOW_OFF; if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) { @@ -547,7 +562,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, } } if (priv->eee_enabled) { - int val = phy_get_eee_err(priv->phydev); + int val = phy_get_eee_err(dev->phydev); if (val) priv->xstats.phy_eee_wakeup_error_n = val; } @@ -666,7 +681,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, edata->eee_active = priv->eee_active; edata->tx_lpi_timer = priv->tx_lpi_timer; - return phy_ethtool_get_eee(priv->phydev, edata); + return phy_ethtool_get_eee(dev->phydev, edata); } static int stmmac_ethtool_op_set_eee(struct net_device *dev, @@ -691,7 +706,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, priv->tx_lpi_timer = edata->tx_lpi_timer; } - return phy_ethtool_set_eee(priv->phydev, edata); + return phy_ethtool_set_eee(dev->phydev, edata); } static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) @@ -850,8 +865,6 @@ static int stmmac_set_tunable(struct net_device *dev, static const struct ethtool_ops stmmac_ethtool_ops = { .begin = stmmac_check_if_running, .get_drvinfo = stmmac_ethtool_getdrvinfo, - .get_settings = stmmac_ethtool_getsettings, - .set_settings = stmmac_ethtool_setsettings, .get_msglevel = stmmac_ethtool_getmsglevel, .set_msglevel = stmmac_ethtool_setmsglevel, .get_regs = stmmac_ethtool_gregs, @@ -871,6 +884,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .set_coalesce = stmmac_set_coalesce, .get_tunable = stmmac_get_tunable, .set_tunable = stmmac_set_tunable, + .get_link_ksettings = stmmac_ethtool_get_link_ksettings, + .set_link_ksettings = stmmac_ethtool_set_link_ksettings, }; void stmmac_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e2c94ec4edd0..03dbf8e89c4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -221,7 +221,8 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv) */ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv) { - struct phy_device *phydev = priv->phydev; + struct net_device *ndev = priv->dev; + struct phy_device *phydev = ndev->phydev; if (likely(priv->plat->fix_mac_speed)) priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed); @@ -279,6 +280,7 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) */ bool stmmac_eee_init(struct stmmac_priv *priv) { + struct net_device *ndev = priv->dev; unsigned long flags; bool ret = false; @@ -295,7 +297,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) int tx_lpi_timer = priv->tx_lpi_timer; /* Check if the PHY supports EEE */ - if (phy_init_eee(priv->phydev, 1)) { + if (phy_init_eee(ndev->phydev, 1)) { /* To manage at run-time if the EEE cannot be supported * anymore (for example because the lp caps have been * changed). @@ -327,7 +329,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) tx_lpi_timer); } /* Set HW EEE according to the speed */ - priv->hw->mac->set_eee_pls(priv->hw, priv->phydev->link); + priv->hw->mac->set_eee_pls(priv->hw, ndev->phydev->link); ret = true; spin_unlock_irqrestore(&priv->lock, flags); @@ -700,7 +702,7 @@ static void stmmac_release_ptp(struct stmmac_priv *priv) static void stmmac_adjust_link(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; unsigned long flags; int new_state = 0; unsigned int fc = priv->flow_ctrl, pause_time = priv->pause; @@ -890,8 +892,6 @@ static int stmmac_init_phy(struct net_device *dev) pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); - priv->phydev = phydev; - return 0; } @@ -1816,8 +1816,8 @@ static int stmmac_open(struct net_device *dev) stmmac_init_tx_coalesce(priv); - if (priv->phydev) - phy_start(priv->phydev); + if (dev->phydev) + phy_start(dev->phydev); /* Request the IRQ lines */ ret = request_irq(dev->irq, stmmac_interrupt, @@ -1864,8 +1864,8 @@ wolirq_error: init_error: free_dma_desc_resources(priv); dma_desc_error: - if (priv->phydev) - phy_disconnect(priv->phydev); + if (dev->phydev) + phy_disconnect(dev->phydev); return ret; } @@ -1884,10 +1884,9 @@ static int stmmac_release(struct net_device *dev) del_timer_sync(&priv->eee_ctrl_timer); /* Stop and disconnect the PHY */ - if (priv->phydev) { - phy_stop(priv->phydev); - phy_disconnect(priv->phydev); - priv->phydev = NULL; + if (dev->phydev) { + phy_stop(dev->phydev); + phy_disconnect(dev->phydev); } netif_stop_queue(dev); @@ -2725,27 +2724,11 @@ static void stmmac_set_rx_mode(struct net_device *dev) */ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { - struct stmmac_priv *priv = netdev_priv(dev); - int max_mtu; - if (netif_running(dev)) { pr_err("%s: must be stopped to change its MTU\n", dev->name); return -EBUSY; } - if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) - max_mtu = JUMBO_LEN; - else - max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN); - - if (priv->plat->maxmtu < max_mtu) - max_mtu = priv->plat->maxmtu; - - if ((new_mtu < 46) || (new_mtu > max_mtu)) { - pr_err("%s: invalid MTU, max MTU is: %d\n", dev->name, max_mtu); - return -EINVAL; - } - dev->mtu = new_mtu; netdev_update_features(dev); @@ -2879,7 +2862,6 @@ static void stmmac_poll_controller(struct net_device *dev) */ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct stmmac_priv *priv = netdev_priv(dev); int ret = -EOPNOTSUPP; if (!netif_running(dev)) @@ -2889,9 +2871,9 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!priv->phydev) + if (!dev->phydev) return -EINVAL; - ret = phy_mii_ioctl(priv->phydev, rq, cmd); + ret = phy_mii_ioctl(dev->phydev, rq, cmd); break; case SIOCSHWTSTAMP: ret = stmmac_hwtstamp_ioctl(dev, rq); @@ -3328,6 +3310,15 @@ int stmmac_dvr_probe(struct device *device, #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); + /* MTU range: 46 - hw-specific max */ + ndev->min_mtu = ETH_ZLEN - ETH_HLEN; + if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) + ndev->max_mtu = JUMBO_LEN; + else + ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN); + if (priv->plat->maxmtu < ndev->max_mtu) + ndev->max_mtu = priv->plat->maxmtu; + if (flow_ctrl) priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ @@ -3444,8 +3435,8 @@ int stmmac_suspend(struct device *dev) if (!ndev || !netif_running(ndev)) return 0; - if (priv->phydev) - phy_stop(priv->phydev); + if (ndev->phydev) + phy_stop(ndev->phydev); spin_lock_irqsave(&priv->lock, flags); @@ -3539,8 +3530,8 @@ int stmmac_resume(struct device *dev) spin_unlock_irqrestore(&priv->lock, flags); - if (priv->phydev) - phy_start(priv->phydev); + if (ndev->phydev) + phy_start(ndev->phydev); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0a0d6a86f397..4d544c34c1f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -417,9 +417,7 @@ static int stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->suspend) - priv->plat->suspend(pdev, priv->plat->bsp_priv); - else if (priv->plat->exit) + if (priv->plat->exit) priv->plat->exit(pdev, priv->plat->bsp_priv); return ret; @@ -438,9 +436,7 @@ static int stmmac_pltfr_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); - if (priv->plat->resume) - priv->plat->resume(pdev, priv->plat->bsp_priv); - else if (priv->plat->init) + if (priv->plat->init) priv->plat->init(pdev, priv->plat->bsp_priv); return stmmac_resume(dev); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 062bce9acde6..e9e5ef241c6f 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3863,9 +3863,6 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu) { struct cas *cp = netdev_priv(dev); - if (new_mtu < CAS_MIN_MTU || new_mtu > CAS_MAX_MTU) - return -EINVAL; - dev->mtu = new_mtu; if (!netif_running(dev) || !netif_device_present(dev)) return 0; @@ -5115,6 +5112,10 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; + /* MTU range: 60 - varies or 9000 */ + dev->min_mtu = CAS_MIN_MTU; + dev->max_mtu = CAS_MAX_MTU; + if (register_netdev(dev)) { dev_err(&pdev->dev, "Cannot register net device, aborting\n"); goto err_out_free_consistent; diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 0ac449acaf5b..335b87660638 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -139,7 +139,6 @@ static const struct net_device_ops vsw_ops = { .ndo_set_mac_address = sunvnet_set_mac_addr_common, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = sunvnet_tx_timeout_common, - .ndo_change_mtu = sunvnet_change_mtu_common, .ndo_start_xmit = vsw_start_xmit, .ndo_select_queue = vsw_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -239,6 +238,10 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; + /* MTU range: 68 - 65535 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = VNET_MAX_MTU; + SET_NETDEV_DEV(dev, &vdev->dev); return dev; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index a2371aa14a49..f90d1af6d390 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6754,9 +6754,6 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) struct niu *np = netdev_priv(dev); int err, orig_jumbo, new_jumbo; - if (new_mtu < 68 || new_mtu > NIU_MAX_MTU) - return -EINVAL; - orig_jumbo = (dev->mtu > ETH_DATA_LEN); new_jumbo = (new_mtu > ETH_DATA_LEN); @@ -9823,6 +9820,10 @@ static int niu_pci_init_one(struct pci_dev *pdev, dev->irq = pdev->irq; + /* MTU range: 68 - 9216 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = NIU_MAX_MTU; + niu_assign_netdev_ops(dev); err = niu_get_invariants(np); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index aa4f9d2d8fa9..ea89ef3b48fb 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -1064,7 +1064,6 @@ static const struct net_device_ops bigmac_ops = { .ndo_get_stats = bigmac_get_stats, .ndo_set_rx_mode = bigmac_set_multicast, .ndo_tx_timeout = bigmac_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index d6ad0fbd054e..66ecf0fcc330 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2476,9 +2476,9 @@ static void gem_set_multicast(struct net_device *dev) } /* Jumbo-grams don't seem to work :-( */ -#define GEM_MIN_MTU 68 +#define GEM_MIN_MTU ETH_MIN_MTU #if 1 -#define GEM_MAX_MTU 1500 +#define GEM_MAX_MTU ETH_DATA_LEN #else #define GEM_MAX_MTU 9000 #endif @@ -2487,9 +2487,6 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu) { struct gem *gp = netdev_priv(dev); - if (new_mtu < GEM_MIN_MTU || new_mtu > GEM_MAX_MTU) - return -EINVAL; - dev->mtu = new_mtu; /* We'll just catch it later when the device is up'd or resumed */ @@ -2977,6 +2974,10 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; + /* MTU range: 68 - 1500 (Jumbo mode is broken) */ + dev->min_mtu = GEM_MIN_MTU; + dev->max_mtu = GEM_MAX_MTU; + /* Register with kernel */ if (register_netdev(dev)) { pr_err("Cannot register net device, aborting\n"); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index cf4dcff051d5..ca96408058b0 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2669,7 +2669,6 @@ static const struct net_device_ops hme_netdev_ops = { .ndo_tx_timeout = happy_meal_tx_timeout, .ndo_get_stats = happy_meal_get_stats, .ndo_set_rx_mode = happy_meal_set_multicast, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 9b825780b3be..c5ef711f6567 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -823,7 +823,6 @@ static const struct net_device_ops qec_ops = { .ndo_start_xmit = qe_start_xmit, .ndo_set_rx_mode = qe_set_multicast, .ndo_tx_timeout = qe_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index a2f9b47de187..5356a7074796 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -159,7 +159,6 @@ static const struct net_device_ops vnet_ops = { .ndo_set_mac_address = sunvnet_set_mac_addr_common, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = sunvnet_tx_timeout_common, - .ndo_change_mtu = sunvnet_change_mtu_common, .ndo_start_xmit = vnet_start_xmit, .ndo_select_queue = vnet_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -202,6 +201,10 @@ static struct vnet *vnet_new(const u64 *local_mac, NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; + /* MTU range: 68 - 65535 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = VNET_MAX_MTU; + SET_NETDEV_DEV(dev, &vdev->dev); err = register_netdev(dev); diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 904a5a12a85d..8878b75d68b4 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -704,9 +704,8 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf) return 0; } -/* Got back a STOPPED LDC message on port. If the queue is stopped, - * wake it up so that we'll send out another START message at the - * next TX. +/* If the queue is stopped, wake it up so that we'll + * send out another START message at the next TX. */ static void maybe_tx_wakeup(struct vnet_port *port) { @@ -734,6 +733,7 @@ EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common); static int vnet_event_napi(struct vnet_port *port, int budget) { + struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); struct vio_driver_state *vio = &port->vio; int tx_wakeup, err; int npkts = 0; @@ -747,6 +747,16 @@ ldc_ctrl: if (event == LDC_EVENT_RESET) { vnet_port_reset(port); vio_port_up(vio); + + /* If the device is running but its tx queue was + * stopped (due to flow control), restart it. + * This is necessary since vnet_port_reset() + * clears the tx drings and thus we may never get + * back a VIO_TYPE_DATA ACK packet - which is + * the normal mechanism to restart the tx queue. + */ + if (netif_running(dev)) + maybe_tx_wakeup(port); } port->rx_event = 0; return 0; @@ -1583,16 +1593,6 @@ void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp) } EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common); -int sunvnet_change_mtu_common(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > 65535) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL_GPL(sunvnet_change_mtu_common); - int sunvnet_set_mac_addr_common(struct net_device *dev, void *p) { return -EINVAL; diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index bd36528af972..ce5c824128a3 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -15,6 +15,8 @@ #define VNET_MINTSO 2048 /* VIO protocol's minimum TSO len */ #define VNET_MAXTSO 65535 /* VIO protocol's maximum TSO len */ +#define VNET_MAX_MTU 65535 + /* VNET packets are sent in buffers with the first 6 bytes skipped * so that after the ethernet header the IPv4/IPv6 headers are aligned * properly. @@ -125,7 +127,6 @@ int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); void sunvnet_tx_timeout_common(struct net_device *dev); -int sunvnet_change_mtu_common(struct net_device *dev, int new_mtu); int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, struct vnet_port *(*vnet_tx_port) (struct sk_buff *, struct net_device *)); diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 5eedac495077..70533017aaa7 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2213,7 +2213,7 @@ static int dwceqos_start_xmit(struct sk_buff *skb, struct net_device *ndev) tx_error: dwceqos_tx_rollback(lp, &trans); - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return 0; } diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 7108c68f16d3..baa3e4a5731c 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -761,16 +761,6 @@ static int bdx_change_mtu(struct net_device *ndev, int new_mtu) { ENTER; - if (new_mtu == ndev->mtu) - RET(0); - - /* enforce minimum frame size */ - if (new_mtu < ETH_ZLEN) { - netdev_err(ndev, "mtu %d is less then minimal %d\n", - new_mtu, ETH_ZLEN); - RET(-EINVAL); - } - ndev->mtu = new_mtu; if (netif_running(ndev)) { bdx_close(ndev); @@ -2057,6 +2047,10 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef BDX_LLTX ndev->features |= NETIF_F_LLTX; #endif + /* MTU range: 60 - 16384 */ + ndev->min_mtu = ETH_ZLEN; + ndev->max_mtu = BDX_MAX_MTU; + spin_lock_init(&priv->tx_lock); /*bdx_hw_reset(priv); */ diff --git a/drivers/net/ethernet/tehuti/tehuti.h b/drivers/net/ethernet/tehuti/tehuti.h index 709ebd6e28b4..8e7b4c9abf21 100644 --- a/drivers/net/ethernet/tehuti/tehuti.h +++ b/drivers/net/ethernet/tehuti/tehuti.h @@ -74,6 +74,9 @@ * ifcontig eth1 txqueuelen 3000 - to change it at runtime */ #define BDX_NDEV_TXQ_LEN 3000 +/* Max MTU for Jumbo Frame mode, per tehutinetworks.net Features FAQ is 16k */ +#define BDX_MAX_MTU (16 * 1024) + #define FIFO_SIZE 4096 #define FIFO_EXTRA_SPACE 1024 diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index fa0cfda24fd9..c56e7030c44e 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1068,7 +1068,6 @@ static const struct net_device_ops cpmac_netdev_ops = { .ndo_tx_timeout = cpmac_tx_timeout, .ndo_set_rx_mode = cpmac_set_multicast_list, .ndo_do_ioctl = cpmac_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c6cff3d2ff05..39d06e8e6aa7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1376,10 +1376,6 @@ static int cpsw_ndo_open(struct net_device *ndev) ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); if (!cpsw_common_res_usage_state(cpsw)) { - /* setup tx dma to fixed prio and zero offset */ - cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1); - cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0); - /* disable priority elevation */ __raw_writel(0, &cpsw->regs->ptype); @@ -1883,7 +1879,6 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_set_mac_address = cpsw_ndo_set_mac_address, .ndo_do_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1967,27 +1962,30 @@ static int cpsw_get_ts_info(struct net_device *ndev, return 0; } -static int cpsw_get_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) +static int cpsw_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; int slave_no = cpsw_slave_index(cpsw, priv); if (cpsw->slaves[slave_no].phy) - return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd); + return phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy, + ecmd); else return -EOPNOTSUPP; } -static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +static int cpsw_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; int slave_no = cpsw_slave_index(cpsw, priv); if (cpsw->slaves[slave_no].phy) - return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd); + return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy, + ecmd); else return -EOPNOTSUPP; } @@ -2245,8 +2243,6 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .set_msglevel = cpsw_set_msglevel, .get_link = ethtool_op_get_link, .get_ts_info = cpsw_get_ts_info, - .get_settings = cpsw_get_settings, - .set_settings = cpsw_set_settings, .get_coalesce = cpsw_get_coalesce, .set_coalesce = cpsw_set_coalesce, .get_sset_count = cpsw_get_sset_count, @@ -2262,6 +2258,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .complete = cpsw_ethtool_op_complete, .get_channels = cpsw_get_channels, .set_channels = cpsw_set_channels, + .get_link_ksettings = cpsw_get_link_ksettings, + .set_link_ksettings = cpsw_set_link_ksettings, }; static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw, diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index c3f35f11a8fd..56708a79a18a 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -124,6 +124,29 @@ struct cpdma_chan { int int_set, int_clear, td; }; +struct cpdma_control_info { + u32 reg; + u32 shift, mask; + int access; +#define ACCESS_RO BIT(0) +#define ACCESS_WO BIT(1) +#define ACCESS_RW (ACCESS_RO | ACCESS_WO) +}; + +static struct cpdma_control_info controls[] = { + [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, + [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL, 4, 1, ACCESS_RW}, + [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL, 2, 1, ACCESS_RW}, + [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL, 1, 1, ACCESS_RW}, + [CPDMA_TX_PRIO_FIXED] = {CPDMA_DMACONTROL, 0, 1, ACCESS_RW}, + [CPDMA_STAT_IDLE] = {CPDMA_DMASTATUS, 31, 1, ACCESS_RO}, + [CPDMA_STAT_TX_ERR_CODE] = {CPDMA_DMASTATUS, 20, 0xf, ACCESS_RW}, + [CPDMA_STAT_TX_ERR_CHAN] = {CPDMA_DMASTATUS, 16, 0x7, ACCESS_RW}, + [CPDMA_STAT_RX_ERR_CODE] = {CPDMA_DMASTATUS, 12, 0xf, ACCESS_RW}, + [CPDMA_STAT_RX_ERR_CHAN] = {CPDMA_DMASTATUS, 8, 0x7, ACCESS_RW}, + [CPDMA_RX_BUFFER_OFFSET] = {CPDMA_RXBUFFOFS, 0, 0xffff, ACCESS_RW}, +}; + #define tx_chan_num(chan) (chan) #define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) #define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) @@ -253,6 +276,31 @@ static void cpdma_desc_free(struct cpdma_desc_pool *pool, gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size); } +static int _cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) +{ + struct cpdma_control_info *info = &controls[control]; + u32 val; + + if (!ctlr->params.has_ext_regs) + return -ENOTSUPP; + + if (ctlr->state != CPDMA_STATE_ACTIVE) + return -EINVAL; + + if (control < 0 || control >= ARRAY_SIZE(controls)) + return -ENOENT; + + if ((info->access & ACCESS_WO) != ACCESS_WO) + return -EPERM; + + val = dma_reg_read(ctlr, info->reg); + val &= ~(info->mask << info->shift); + val |= (value & info->mask) << info->shift; + dma_reg_write(ctlr, info->reg, val); + + return 0; +} + struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) { struct cpdma_ctlr *ctlr; @@ -324,6 +372,10 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) if (ctlr->channels[i]) cpdma_chan_start(ctlr->channels[i]); } + + _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, 1); + _cpdma_control_set(ctlr, CPDMA_RX_BUFFER_OFFSET, 0); + spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } @@ -335,7 +387,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) int i; spin_lock_irqsave(&ctlr->lock, flags); - if (ctlr->state == CPDMA_STATE_TEARDOWN) { + if (ctlr->state != CPDMA_STATE_ACTIVE) { spin_unlock_irqrestore(&ctlr->lock, flags); return -EINVAL; } @@ -533,7 +585,7 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) cpdma_chan_stop(chan); ctlr->channels[chan->chan_num] = NULL; ctlr->chan_num--; - + devm_kfree(ctlr->dev, chan); cpdma_chan_split_pool(ctlr); spin_unlock_irqrestore(&ctlr->lock, flags); @@ -874,29 +926,6 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable) return 0; } -struct cpdma_control_info { - u32 reg; - u32 shift, mask; - int access; -#define ACCESS_RO BIT(0) -#define ACCESS_WO BIT(1) -#define ACCESS_RW (ACCESS_RO | ACCESS_WO) -}; - -static struct cpdma_control_info controls[] = { - [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, - [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL, 4, 1, ACCESS_RW}, - [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL, 2, 1, ACCESS_RW}, - [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL, 1, 1, ACCESS_RW}, - [CPDMA_TX_PRIO_FIXED] = {CPDMA_DMACONTROL, 0, 1, ACCESS_RW}, - [CPDMA_STAT_IDLE] = {CPDMA_DMASTATUS, 31, 1, ACCESS_RO}, - [CPDMA_STAT_TX_ERR_CODE] = {CPDMA_DMASTATUS, 20, 0xf, ACCESS_RW}, - [CPDMA_STAT_TX_ERR_CHAN] = {CPDMA_DMASTATUS, 16, 0x7, ACCESS_RW}, - [CPDMA_STAT_RX_ERR_CODE] = {CPDMA_DMASTATUS, 12, 0xf, ACCESS_RW}, - [CPDMA_STAT_RX_ERR_CHAN] = {CPDMA_DMASTATUS, 8, 0x7, ACCESS_RW}, - [CPDMA_RX_BUFFER_OFFSET] = {CPDMA_RXBUFFOFS, 0, 0xffff, ACCESS_RW}, -}; - int cpdma_control_get(struct cpdma_ctlr *ctlr, int control) { unsigned long flags; @@ -931,35 +960,10 @@ unlock_ret: int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) { unsigned long flags; - struct cpdma_control_info *info = &controls[control]; int ret; - u32 val; spin_lock_irqsave(&ctlr->lock, flags); - - ret = -ENOTSUPP; - if (!ctlr->params.has_ext_regs) - goto unlock_ret; - - ret = -EINVAL; - if (ctlr->state != CPDMA_STATE_ACTIVE) - goto unlock_ret; - - ret = -ENOENT; - if (control < 0 || control >= ARRAY_SIZE(controls)) - goto unlock_ret; - - ret = -EPERM; - if ((info->access & ACCESS_WO) != ACCESS_WO) - goto unlock_ret; - - val = dma_reg_read(ctlr, info->reg); - val &= ~(info->mask << info->shift); - val |= (value & info->mask) << info->shift; - dma_reg_write(ctlr, info->reg, val); - ret = 0; - -unlock_ret: + ret = _cpdma_control_set(ctlr, control, value); spin_unlock_irqrestore(&ctlr->lock, flags); return ret; } diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 32516661f180..78b4c831f5ad 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1766,21 +1766,6 @@ out: return (ret == 0) ? 0 : err; } -static int netcp_ndo_change_mtu(struct net_device *ndev, int new_mtu) -{ - struct netcp_intf *netcp = netdev_priv(ndev); - - /* MTU < 68 is an error for IPv4 traffic */ - if ((new_mtu < 68) || - (new_mtu > (NETCP_MAX_FRAME_SIZE - ETH_HLEN - ETH_FCS_LEN))) { - dev_err(netcp->ndev_dev, "Invalid mtu size = %d\n", new_mtu); - return -EINVAL; - } - - ndev->mtu = new_mtu; - return 0; -} - static void netcp_ndo_tx_timeout(struct net_device *ndev) { struct netcp_intf *netcp = netdev_priv(ndev); @@ -1886,7 +1871,6 @@ static const struct net_device_ops netcp_netdev_ops = { .ndo_start_xmit = netcp_ndo_start_xmit, .ndo_set_rx_mode = netcp_set_rx_mode, .ndo_do_ioctl = netcp_ndo_ioctl, - .ndo_change_mtu = netcp_ndo_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = netcp_rx_add_vid, @@ -1923,6 +1907,10 @@ static int netcp_create_interface(struct netcp_device *netcp_device, ndev->hw_features = ndev->features; ndev->vlan_features |= NETIF_F_SG; + /* MTU range: 68 - 9486 */ + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = NETCP_MAX_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); + netcp = netdev_priv(ndev); spin_lock_init(&netcp->lock); INIT_LIST_HEAD(&netcp->module_head); @@ -2070,7 +2058,6 @@ static void netcp_delete_interface(struct netcp_device *netcp_device, if (module->release) module->release(intf_modpriv->module_priv); list_del(&intf_modpriv->intf_list); - kfree(intf_modpriv); } WARN(!list_empty(&netcp->module_head), "%s interface module list is not empty!\n", ndev->name); @@ -2133,6 +2120,8 @@ static int netcp_probe(struct platform_device *pdev) } } + of_node_put(interfaces); + /* Add the device instance to the list */ list_add_tail(&netcp_device->device_list, &netcp_devices); @@ -2145,6 +2134,8 @@ probe_quit_interface: netcp_delete_interface(netcp_device, netcp_intf->ndev); } + of_node_put(interfaces); + probe_quit: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -2165,7 +2156,6 @@ static int netcp_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "Removing module \"%s\"\n", module->name); module->remove(netcp_device, inst_modpriv->module_priv); list_del(&inst_modpriv->inst_list); - kfree(inst_modpriv); } /* now that all modules are removed, clean up the interfaces */ diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index d543298d6750..48cb04fb7e0c 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1840,8 +1840,8 @@ static void keystone_get_ethtool_stats(struct net_device *ndev, spin_unlock_bh(&gbe_dev->hw_stats_lock); } -static int keystone_get_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int keystone_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct netcp_intf *netcp = netdev_priv(ndev); struct phy_device *phy = ndev->phydev; @@ -1858,20 +1858,28 @@ static int keystone_get_settings(struct net_device *ndev, if (!gbe_intf->slave) return -EINVAL; - ret = phy_ethtool_gset(phy, cmd); + ret = phy_ethtool_ksettings_get(phy, cmd); if (!ret) - cmd->port = gbe_intf->slave->phy_port_t; + cmd->base.port = gbe_intf->slave->phy_port_t; return ret; } -static int keystone_set_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int keystone_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct netcp_intf *netcp = netdev_priv(ndev); struct phy_device *phy = ndev->phydev; struct gbe_intf *gbe_intf; - u32 features = cmd->advertising & cmd->supported; + u8 port = cmd->base.port; + u32 advertising, supported; + u32 features; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + features = advertising & supported; if (!phy) return -EINVAL; @@ -1883,25 +1891,25 @@ static int keystone_set_settings(struct net_device *ndev, if (!gbe_intf->slave) return -EINVAL; - if (cmd->port != gbe_intf->slave->phy_port_t) { - if ((cmd->port == PORT_TP) && !(features & ADVERTISED_TP)) + if (port != gbe_intf->slave->phy_port_t) { + if ((port == PORT_TP) && !(features & ADVERTISED_TP)) return -EINVAL; - if ((cmd->port == PORT_AUI) && !(features & ADVERTISED_AUI)) + if ((port == PORT_AUI) && !(features & ADVERTISED_AUI)) return -EINVAL; - if ((cmd->port == PORT_BNC) && !(features & ADVERTISED_BNC)) + if ((port == PORT_BNC) && !(features & ADVERTISED_BNC)) return -EINVAL; - if ((cmd->port == PORT_MII) && !(features & ADVERTISED_MII)) + if ((port == PORT_MII) && !(features & ADVERTISED_MII)) return -EINVAL; - if ((cmd->port == PORT_FIBRE) && !(features & ADVERTISED_FIBRE)) + if ((port == PORT_FIBRE) && !(features & ADVERTISED_FIBRE)) return -EINVAL; } - gbe_intf->slave->phy_port_t = cmd->port; - return phy_ethtool_sset(phy, cmd); + gbe_intf->slave->phy_port_t = port; + return phy_ethtool_ksettings_set(phy, cmd); } static const struct ethtool_ops keystone_ethtool_ops = { @@ -1912,8 +1920,8 @@ static const struct ethtool_ops keystone_ethtool_ops = { .get_strings = keystone_get_stat_strings, .get_sset_count = keystone_get_sset_count, .get_ethtool_stats = keystone_get_ethtool_stats, - .get_settings = keystone_get_settings, - .set_settings = keystone_set_settings, + .get_link_ksettings = keystone_get_link_ksettings, + .set_link_ksettings = keystone_set_link_ksettings, }; #define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \ diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 6c7ec1ddd475..c8d53d8c83ee 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -772,7 +772,6 @@ static const struct net_device_ops tlan_netdev_ops = { .ndo_get_stats = tlan_get_stats, .ndo_set_rx_mode = tlan_set_multicast_list, .ndo_do_ioctl = tlan_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 11213a38c795..0aaf975bb347 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -59,6 +59,9 @@ /* Maximum number of packets to handle per "poll". */ #define TILE_NET_WEIGHT 64 +/* Maximum Jumbo Packet MTU */ +#define TILE_JUMBO_MAX_MTU 9000 + /* Number of entries in each iqueue. */ #define IQUEUE_ENTRIES 512 @@ -2101,17 +2104,6 @@ static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EOPNOTSUPP; } -/* Change the MTU. */ -static int tile_net_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68) - return -EINVAL; - if (new_mtu > ((jumbo_num != 0) ? 9000 : 1500)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - /* Change the Ethernet address of the NIC. * * The hypervisor driver does not support changing MAC address. However, @@ -2154,7 +2146,6 @@ static const struct net_device_ops tile_net_ops = { .ndo_start_xmit = tile_net_tx, .ndo_select_queue = tile_net_select_queue, .ndo_do_ioctl = tile_net_ioctl, - .ndo_change_mtu = tile_net_change_mtu, .ndo_tx_timeout = tile_net_tx_timeout, .ndo_set_mac_address = tile_net_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2174,7 +2165,11 @@ static void tile_net_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &tile_net_ops; dev->watchdog_timeo = TILE_NET_TIMEOUT; - dev->mtu = 1500; + + /* MTU range: 68 - 1500 or 9000 */ + dev->mtu = ETH_DATA_LEN; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = jumbo_num ? TILE_JUMBO_MAX_MTU : ETH_DATA_LEN; features |= NETIF_F_HW_CSUM; features |= NETIF_F_SG; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 4ef605a90247..0a3b7dafa3ba 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -87,7 +87,7 @@ /* This should be 1500 if "jumbo" is not set in LIPP. */ /* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */ /* ISSUE: This has not been thoroughly tested (except at 1500). */ -#define TILE_NET_MTU 1500 +#define TILE_NET_MTU ETH_DATA_LEN /* HACK: Define this to verify incoming packets. */ /* #define TILE_NET_VERIFY_INGRESS */ @@ -2095,26 +2095,6 @@ static struct rtnl_link_stats64 *tile_net_get_stats64(struct net_device *dev, } -/* - * Change the "mtu". - * - * The "change_mtu" method is usually not needed. - * If you need it, it must be like this. - */ -static int tile_net_change_mtu(struct net_device *dev, int new_mtu) -{ - PDEBUG("tile_net_change_mtu()\n"); - - /* Check ranges. */ - if ((new_mtu < 68) || (new_mtu > 1500)) - return -EINVAL; - - /* Accept the value. */ - dev->mtu = new_mtu; - - return 0; -} - /* * Change the Ethernet Address of the NIC. @@ -2229,7 +2209,6 @@ static const struct net_device_ops tile_net_ops = { .ndo_start_xmit = tile_net_tx, .ndo_do_ioctl = tile_net_ioctl, .ndo_get_stats64 = tile_net_get_stats64, - .ndo_change_mtu = tile_net_change_mtu, .ndo_tx_timeout = tile_net_tx_timeout, .ndo_set_mac_address = tile_net_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2252,7 +2231,11 @@ static void tile_net_setup(struct net_device *dev) dev->netdev_ops = &tile_net_ops; dev->watchdog_timeo = TILE_NET_TIMEOUT; dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN; + + /* MTU range: 68 - 1500 */ dev->mtu = TILE_NET_MTU; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = TILE_NET_MTU; features |= NETIF_F_HW_CSUM; features |= NETIF_F_SG; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 272f2b1cb7ad..345316c749e7 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1114,24 +1114,6 @@ static int gelic_net_poll(struct napi_struct *napi, int budget) } return packets_done; } -/** - * gelic_net_change_mtu - changes the MTU of an interface - * @netdev: interface device structure - * @new_mtu: new MTU value - * - * returns 0 on success, <0 on failure - */ -int gelic_net_change_mtu(struct net_device *netdev, int new_mtu) -{ - /* no need to re-alloc skbs or so -- the max mtu is about 2.3k - * and mtu is outbound only anyway */ - if ((new_mtu < GELIC_NET_MIN_MTU) || - (new_mtu > GELIC_NET_MAX_MTU)) { - return -EINVAL; - } - netdev->mtu = new_mtu; - return 0; -} /** * gelic_card_interrupt - event handler for gelic_net @@ -1446,7 +1428,6 @@ static const struct net_device_ops gelic_netdevice_ops = { .ndo_stop = gelic_net_stop, .ndo_start_xmit = gelic_net_xmit, .ndo_set_rx_mode = gelic_net_set_multi, - .ndo_change_mtu = gelic_net_change_mtu, .ndo_tx_timeout = gelic_net_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -1513,6 +1494,10 @@ int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card) netdev->features |= NETIF_F_VLAN_CHALLENGED; } + /* MTU range: 64 - 1518 */ + netdev->min_mtu = GELIC_NET_MIN_MTU; + netdev->max_mtu = GELIC_NET_MAX_MTU; + status = register_netdev(netdev); if (status) { dev_err(ctodev(card), "%s:Couldn't register %s %d\n", diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 8505196be9f5..003d0452d9cb 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -373,7 +373,6 @@ int gelic_net_stop(struct net_device *netdev); int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); void gelic_net_tx_timeout(struct net_device *netdev); -int gelic_net_change_mtu(struct net_device *netdev, int new_mtu); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); /* shared ethtool ops */ diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 928c1dca2673..eed18f88bdff 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -2558,7 +2558,6 @@ static const struct net_device_ops gelic_wl_netdevice_ops = { .ndo_stop = gelic_wl_stop, .ndo_start_xmit = gelic_net_xmit, .ndo_set_rx_mode = gelic_net_set_multi, - .ndo_change_mtu = gelic_net_change_mtu, .ndo_tx_timeout = gelic_net_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 36a6e8b54d94..cb341dfe65ad 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -1279,25 +1279,6 @@ static int spider_net_poll(struct napi_struct *napi, int budget) } /** - * spider_net_change_mtu - changes the MTU of an interface - * @netdev: interface device structure - * @new_mtu: new MTU value - * - * returns 0 on success, <0 on failure - */ -static int -spider_net_change_mtu(struct net_device *netdev, int new_mtu) -{ - /* no need to re-alloc skbs or so -- the max mtu is about 2.3k - * and mtu is outbound only anyway */ - if ( (new_mtu < SPIDER_NET_MIN_MTU ) || - (new_mtu > SPIDER_NET_MAX_MTU) ) - return -EINVAL; - netdev->mtu = new_mtu; - return 0; -} - -/** * spider_net_set_mac - sets the MAC of an interface * @netdev: interface device structure * @ptr: pointer to new MAC address @@ -2229,7 +2210,6 @@ static const struct net_device_ops spider_net_ops = { .ndo_start_xmit = spider_net_xmit, .ndo_set_rx_mode = spider_net_set_multi, .ndo_set_mac_address = spider_net_set_mac, - .ndo_change_mtu = spider_net_change_mtu, .ndo_do_ioctl = spider_net_do_ioctl, .ndo_tx_timeout = spider_net_tx_timeout, .ndo_validate_addr = eth_validate_addr, @@ -2299,6 +2279,10 @@ spider_net_setup_netdev(struct spider_net_card *card) /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | * NETIF_F_HW_VLAN_CTAG_FILTER */ + /* MTU range: 64 - 2294 */ + netdev->min_mtu = SPIDER_NET_MIN_MTU; + netdev->max_mtu = SPIDER_NET_MAX_MTU; + netdev->irq = card->pdev->irq; card->num_rx_ints = 0; card->ignore_rx_ramfull = 0; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 5b01b3fa9fec..3be61ed28741 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -747,7 +747,6 @@ static const struct net_device_ops tc35815_netdev_ops = { .ndo_tx_timeout = tc35815_tx_timeout, .ndo_do_ioctl = tc35815_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tc35815_poll_controller, diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 8fd131207ee1..f153ad729ce5 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1548,7 +1548,6 @@ static const struct net_device_ops tsi108_netdev_ops = { .ndo_do_ioctl = tsi108_do_ioctl, .ndo_set_mac_address = tsi108_set_mac, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 9d14731cdcb1..ba5c54249055 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -890,7 +890,6 @@ static const struct net_device_ops rhine_netdev_ops = { .ndo_start_xmit = rhine_start_tx, .ndo_get_stats64 = rhine_get_stats64, .ndo_set_rx_mode = rhine_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_do_ioctl = netdev_ioctl, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 908e72e18ef7..4716e60e2ccb 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2284,13 +2284,6 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) struct velocity_info *vptr = netdev_priv(dev); int ret = 0; - if ((new_mtu < VELOCITY_MIN_MTU) || new_mtu > (VELOCITY_MAX_MTU)) { - VELOCITY_PRT(MSG_LEVEL_ERR, KERN_NOTICE "%s: Invalid MTU.\n", - vptr->netdev->name); - ret = -EINVAL; - goto out_0; - } - if (!netif_running(dev)) { dev->mtu = new_mtu; goto out_0; @@ -2864,6 +2857,10 @@ static int velocity_probe(struct device *dev, int irq, NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_IP_CSUM; + /* MTU range: 64 - 9000 */ + netdev->min_mtu = VELOCITY_MIN_MTU; + netdev->max_mtu = VELOCITY_MAX_MTU; + ret = register_netdev(netdev); if (ret < 0) goto err_iounmap; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index d2349a1bc6ba..e1296ef2cf66 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1045,7 +1045,6 @@ static const struct net_device_ops w5100_netdev_ops = { .ndo_set_rx_mode = w5100_set_rx_mode, .ndo_set_mac_address = w5100_set_macaddr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int w5100_mmio_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index ca31a57dbc86..724fabd38a23 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -536,7 +536,6 @@ static const struct net_device_ops w5300_netdev_ops = { .ndo_set_rx_mode = w5300_set_rx_mode, .ndo_set_mac_address = w5300_set_macaddr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static int w5300_hw_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index c688d68c39aa..c9c8a3be9f1b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1034,9 +1034,6 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu) XAE_TRL_SIZE) > lp->rxmem) return -EINVAL; - if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64)) - return -EINVAL; - ndev->mtu = new_mtu; return 0; @@ -1475,6 +1472,10 @@ static int axienet_probe(struct platform_device *pdev) ndev->netdev_ops = &axienet_netdev_ops; ndev->ethtool_ops = &axienet_ethtool_ops; + /* MTU range: 64 - 9000 */ + ndev->min_mtu = 64; + ndev->max_mtu = XAE_JUMBO_MTU; + lp = netdev_priv(ndev); lp->ndev = ndev; lp->dev = &pdev->dev; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index ddced28e8247..3b08ec766076 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -466,7 +466,6 @@ static const struct net_device_ops netdev_ops = { .ndo_set_config = do_config, .ndo_do_ioctl = do_ioctl, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index fa32391720fe..07d862d90869 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1378,7 +1378,6 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_start_xmit = eth_xmit, .ndo_set_rx_mode = eth_set_mcast_list, .ndo_do_ioctl = eth_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 51acc6d86e91..3a639180e4a0 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -166,7 +166,6 @@ static const struct net_device_ops skfp_netdev_ops = { .ndo_stop = skfp_close, .ndo_start_xmit = skfp_send_pkt, .ndo_get_stats = skfp_ctl_get_stats, - .ndo_change_mtu = fddi_change_mtu, .ndo_set_rx_mode = skfp_ctl_set_multicast_list, .ndo_set_mac_address = skfp_ctl_set_mac_address, .ndo_do_ioctl = skfp_ioctl, diff --git a/drivers/net/fjes/Makefile b/drivers/net/fjes/Makefile index 523e3d7cf7aa..bc47b354c104 100644 --- a/drivers/net/fjes/Makefile +++ b/drivers/net/fjes/Makefile @@ -27,4 +27,4 @@ obj-$(CONFIG_FUJITSU_ES) += fjes.o -fjes-objs := fjes_main.o fjes_hw.o fjes_ethtool.o +fjes-objs := fjes_main.o fjes_hw.o fjes_ethtool.o fjes_trace.o fjes_debugfs.o diff --git a/drivers/net/fjes/fjes.h b/drivers/net/fjes/fjes.h index a592fe21c698..0372be3ad8e3 100644 --- a/drivers/net/fjes/fjes.h +++ b/drivers/net/fjes/fjes.h @@ -66,6 +66,10 @@ struct fjes_adapter { bool interrupt_watch_enable; struct fjes_hw hw; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbg_adapter; +#endif }; extern char fjes_driver_name[]; @@ -74,4 +78,16 @@ extern const u32 fjes_support_mtu[]; void fjes_set_ethtool_ops(struct net_device *); +#ifdef CONFIG_DEBUG_FS +void fjes_dbg_adapter_init(struct fjes_adapter *adapter); +void fjes_dbg_adapter_exit(struct fjes_adapter *adapter); +void fjes_dbg_init(void); +void fjes_dbg_exit(void); +#else +static inline void fjes_dbg_adapter_init(struct fjes_adapter *adapter) {} +static inline void fjes_dbg_adapter_exit(struct fjes_adapter *adapter) {} +static inline void fjes_dbg_init(void) {} +static inline void fjes_dbg_exit(void) {} +#endif /* CONFIG_DEBUG_FS */ + #endif /* FJES_H_ */ diff --git a/drivers/net/fjes/fjes_debugfs.c b/drivers/net/fjes/fjes_debugfs.c new file mode 100644 index 000000000000..30052ebd52bf --- /dev/null +++ b/drivers/net/fjes/fjes_debugfs.c @@ -0,0 +1,117 @@ +/* + * FUJITSU Extended Socket Network Device driver + * Copyright (c) 2015-2016 FUJITSU LIMITED + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* debugfs support for fjes driver */ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/platform_device.h> + +#include "fjes.h" + +static struct dentry *fjes_debug_root; + +static const char * const ep_status_string[] = { + "unshared", + "shared", + "waiting", + "complete", +}; + +static int fjes_dbg_status_show(struct seq_file *m, void *v) +{ + struct fjes_adapter *adapter = m->private; + struct fjes_hw *hw = &adapter->hw; + int max_epid = hw->max_epid; + int my_epid = hw->my_epid; + int epidx; + + seq_puts(m, "EPID\tSTATUS SAME_ZONE CONNECTED\n"); + for (epidx = 0; epidx < max_epid; epidx++) { + if (epidx == my_epid) { + seq_printf(m, "ep%d\t%-16c %-16c %-16c\n", + epidx, '-', '-', '-'); + } else { + seq_printf(m, "ep%d\t%-16s %-16c %-16c\n", + epidx, + ep_status_string[fjes_hw_get_partner_ep_status(hw, epidx)], + fjes_hw_epid_is_same_zone(hw, epidx) ? 'Y' : 'N', + fjes_hw_epid_is_shared(hw->hw_info.share, epidx) ? 'Y' : 'N'); + } + } + + return 0; +} + +static int fjes_dbg_status_open(struct inode *inode, struct file *file) +{ + return single_open(file, fjes_dbg_status_show, inode->i_private); +} + +static const struct file_operations fjes_dbg_status_fops = { + .owner = THIS_MODULE, + .open = fjes_dbg_status_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void fjes_dbg_adapter_init(struct fjes_adapter *adapter) +{ + const char *name = dev_name(&adapter->plat_dev->dev); + struct dentry *pfile; + + adapter->dbg_adapter = debugfs_create_dir(name, fjes_debug_root); + if (!adapter->dbg_adapter) { + dev_err(&adapter->plat_dev->dev, + "debugfs entry for %s failed\n", name); + return; + } + + pfile = debugfs_create_file("status", 0444, adapter->dbg_adapter, + adapter, &fjes_dbg_status_fops); + if (!pfile) + dev_err(&adapter->plat_dev->dev, + "debugfs status for %s failed\n", name); +} + +void fjes_dbg_adapter_exit(struct fjes_adapter *adapter) +{ + debugfs_remove_recursive(adapter->dbg_adapter); + adapter->dbg_adapter = NULL; +} + +void fjes_dbg_init(void) +{ + fjes_debug_root = debugfs_create_dir(fjes_driver_name, NULL); + if (!fjes_debug_root) + pr_info("init of debugfs failed\n"); +} + +void fjes_dbg_exit(void) +{ + debugfs_remove_recursive(fjes_debug_root); + fjes_debug_root = NULL; +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/fjes/fjes_ethtool.c b/drivers/net/fjes/fjes_ethtool.c index 9c218e140c41..6575f880f1be 100644 --- a/drivers/net/fjes/fjes_ethtool.c +++ b/drivers/net/fjes/fjes_ethtool.c @@ -49,10 +49,18 @@ static const struct fjes_stats fjes_gstrings_stats[] = { FJES_STAT("tx_dropped", stats64.tx_dropped), }; +#define FJES_EP_STATS_LEN 14 +#define FJES_STATS_LEN \ + (ARRAY_SIZE(fjes_gstrings_stats) + \ + ((&((struct fjes_adapter *)netdev_priv(netdev))->hw)->max_epid - 1) * \ + FJES_EP_STATS_LEN) + static void fjes_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; + int epidx; char *p; int i; @@ -61,11 +69,39 @@ static void fjes_get_ethtool_stats(struct net_device *netdev, data[i] = (fjes_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } + for (epidx = 0; epidx < hw->max_epid; epidx++) { + if (epidx == hw->my_epid) + continue; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .com_regist_buf_exec; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .com_unregist_buf_exec; + data[i++] = hw->ep_shm_info[epidx].ep_stats.send_intr_rx; + data[i++] = hw->ep_shm_info[epidx].ep_stats.send_intr_unshare; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .send_intr_zoneupdate; + data[i++] = hw->ep_shm_info[epidx].ep_stats.recv_intr_rx; + data[i++] = hw->ep_shm_info[epidx].ep_stats.recv_intr_unshare; + data[i++] = hw->ep_shm_info[epidx].ep_stats.recv_intr_stop; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .recv_intr_zoneupdate; + data[i++] = hw->ep_shm_info[epidx].ep_stats.tx_buffer_full; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .tx_dropped_not_shared; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .tx_dropped_ver_mismatch; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .tx_dropped_buf_size_mismatch; + data[i++] = hw->ep_shm_info[epidx].ep_stats + .tx_dropped_vlanid_mismatch; + } } static void fjes_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; u8 *p = data; int i; @@ -76,6 +112,38 @@ static void fjes_get_strings(struct net_device *netdev, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + for (i = 0; i < hw->max_epid; i++) { + if (i == hw->my_epid) + continue; + sprintf(p, "ep%u_com_regist_buf_exec", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_com_unregist_buf_exec", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_send_intr_rx", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_send_intr_unshare", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_send_intr_zoneupdate", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_recv_intr_rx", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_recv_intr_unshare", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_recv_intr_stop", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_recv_intr_zoneupdate", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_tx_buffer_full", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_tx_dropped_not_shared", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_tx_dropped_ver_mismatch", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_tx_dropped_buf_size_mismatch", i); + p += ETH_GSTRING_LEN; + sprintf(p, "ep%u_tx_dropped_vlanid_mismatch", i); + p += ETH_GSTRING_LEN; + } break; } } @@ -84,7 +152,7 @@ static int fjes_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_STATS: - return ARRAY_SIZE(fjes_gstrings_stats); + return FJES_STATS_LEN; default: return -EOPNOTSUPP; } @@ -121,12 +189,123 @@ static int fjes_get_settings(struct net_device *netdev, return 0; } +static int fjes_get_regs_len(struct net_device *netdev) +{ +#define FJES_REGS_LEN 37 + return FJES_REGS_LEN * sizeof(u32); +} + +static void fjes_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; + u32 *regs_buff = p; + + memset(p, 0, FJES_REGS_LEN * sizeof(u32)); + + regs->version = 1; + + /* Information registers */ + regs_buff[0] = rd32(XSCT_OWNER_EPID); + regs_buff[1] = rd32(XSCT_MAX_EP); + + /* Device Control registers */ + regs_buff[4] = rd32(XSCT_DCTL); + + /* Command Control registers */ + regs_buff[8] = rd32(XSCT_CR); + regs_buff[9] = rd32(XSCT_CS); + regs_buff[10] = rd32(XSCT_SHSTSAL); + regs_buff[11] = rd32(XSCT_SHSTSAH); + + regs_buff[13] = rd32(XSCT_REQBL); + regs_buff[14] = rd32(XSCT_REQBAL); + regs_buff[15] = rd32(XSCT_REQBAH); + + regs_buff[17] = rd32(XSCT_RESPBL); + regs_buff[18] = rd32(XSCT_RESPBAL); + regs_buff[19] = rd32(XSCT_RESPBAH); + + /* Interrupt Control registers */ + regs_buff[32] = rd32(XSCT_IS); + regs_buff[33] = rd32(XSCT_IMS); + regs_buff[34] = rd32(XSCT_IMC); + regs_buff[35] = rd32(XSCT_IG); + regs_buff[36] = rd32(XSCT_ICTL); +} + +static int fjes_set_dump(struct net_device *netdev, struct ethtool_dump *dump) +{ + struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; + int ret = 0; + + if (dump->flag) { + if (hw->debug_mode) + return -EPERM; + + hw->debug_mode = dump->flag; + + /* enable debug mode */ + mutex_lock(&hw->hw_info.lock); + ret = fjes_hw_start_debug(hw); + mutex_unlock(&hw->hw_info.lock); + + if (ret) + hw->debug_mode = 0; + } else { + if (!hw->debug_mode) + return -EPERM; + + /* disable debug mode */ + mutex_lock(&hw->hw_info.lock); + ret = fjes_hw_stop_debug(hw); + mutex_unlock(&hw->hw_info.lock); + } + + return ret; +} + +static int fjes_get_dump_flag(struct net_device *netdev, + struct ethtool_dump *dump) +{ + struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; + + dump->len = hw->hw_info.trace_size; + dump->version = 1; + dump->flag = hw->debug_mode; + + return 0; +} + +static int fjes_get_dump_data(struct net_device *netdev, + struct ethtool_dump *dump, void *buf) +{ + struct fjes_adapter *adapter = netdev_priv(netdev); + struct fjes_hw *hw = &adapter->hw; + int ret = 0; + + if (hw->hw_info.trace) + memcpy(buf, hw->hw_info.trace, hw->hw_info.trace_size); + else + ret = -EPERM; + + return ret; +} + static const struct ethtool_ops fjes_ethtool_ops = { .get_settings = fjes_get_settings, .get_drvinfo = fjes_get_drvinfo, .get_ethtool_stats = fjes_get_ethtool_stats, .get_strings = fjes_get_strings, .get_sset_count = fjes_get_sset_count, + .get_regs = fjes_get_regs, + .get_regs_len = fjes_get_regs_len, + .set_dump = fjes_set_dump, + .get_dump_flag = fjes_get_dump_flag, + .get_dump_data = fjes_get_dump_data, }; void fjes_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 0dbafedc0a34..9c652c04375b 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -21,6 +21,7 @@ #include "fjes_hw.h" #include "fjes.h" +#include "fjes_trace.h" static void fjes_hw_update_zone_task(struct work_struct *); static void fjes_hw_epstop_task(struct work_struct *); @@ -342,6 +343,9 @@ int fjes_hw_init(struct fjes_hw *hw) ret = fjes_hw_setup(hw); + hw->hw_info.trace = vzalloc(FJES_DEBUG_BUFFER_SIZE); + hw->hw_info.trace_size = FJES_DEBUG_BUFFER_SIZE; + return ret; } @@ -350,6 +354,18 @@ void fjes_hw_exit(struct fjes_hw *hw) int ret; if (hw->base) { + + if (hw->debug_mode) { + /* disable debug mode */ + mutex_lock(&hw->hw_info.lock); + fjes_hw_stop_debug(hw); + mutex_unlock(&hw->hw_info.lock); + } + vfree(hw->hw_info.trace); + hw->hw_info.trace = NULL; + hw->hw_info.trace_size = 0; + hw->debug_mode = 0; + ret = fjes_hw_reset(hw); if (ret) pr_err("%s: reset error", __func__); @@ -371,7 +387,7 @@ fjes_hw_issue_request_command(struct fjes_hw *hw, enum fjes_dev_command_response_e ret = FJES_CMD_STATUS_UNKNOWN; union REG_CR cr; union REG_CS cs; - int timeout; + int timeout = FJES_COMMAND_REQ_TIMEOUT * 1000; cr.reg = 0; cr.bits.req_start = 1; @@ -408,6 +424,8 @@ fjes_hw_issue_request_command(struct fjes_hw *hw, } } + trace_fjes_hw_issue_request_command(&cr, &cs, timeout, ret); + return ret; } @@ -427,11 +445,13 @@ int fjes_hw_request_info(struct fjes_hw *hw) res_buf->info.code = 0; ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_INFO); + trace_fjes_hw_request_info(hw, res_buf); result = 0; if (FJES_DEV_COMMAND_INFO_RES_LEN((*hw->hw_info.max_epid)) != res_buf->info.length) { + trace_fjes_hw_request_info_err("Invalid res_buf"); result = -ENOMSG; } else if (ret == FJES_CMD_STATUS_NORMAL) { switch (res_buf->info.code) { @@ -448,6 +468,7 @@ int fjes_hw_request_info(struct fjes_hw *hw) result = -EPERM; break; case FJES_CMD_STATUS_TIMEOUT: + trace_fjes_hw_request_info_err("Timeout"); result = -EBUSY; break; case FJES_CMD_STATUS_ERROR_PARAM: @@ -512,6 +533,8 @@ int fjes_hw_register_buff_addr(struct fjes_hw *hw, int dest_epid, res_buf->share_buffer.length = 0; res_buf->share_buffer.code = 0; + trace_fjes_hw_register_buff_addr_req(req_buf, buf_pair); + ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_SHARE_BUFFER); timeout = FJES_COMMAND_REQ_BUFF_TIMEOUT * 1000; @@ -532,16 +555,20 @@ int fjes_hw_register_buff_addr(struct fjes_hw *hw, int dest_epid, result = 0; + trace_fjes_hw_register_buff_addr(res_buf, timeout); + if (res_buf->share_buffer.length != - FJES_DEV_COMMAND_SHARE_BUFFER_RES_LEN) + FJES_DEV_COMMAND_SHARE_BUFFER_RES_LEN) { + trace_fjes_hw_register_buff_addr_err("Invalid res_buf"); result = -ENOMSG; - else if (ret == FJES_CMD_STATUS_NORMAL) { + } else if (ret == FJES_CMD_STATUS_NORMAL) { switch (res_buf->share_buffer.code) { case FJES_CMD_REQ_RES_CODE_NORMAL: result = 0; set_bit(dest_epid, &hw->hw_info.buffer_share_bit); break; case FJES_CMD_REQ_RES_CODE_BUSY: + trace_fjes_hw_register_buff_addr_err("Busy Timeout"); result = -EBUSY; break; default: @@ -554,6 +581,7 @@ int fjes_hw_register_buff_addr(struct fjes_hw *hw, int dest_epid, result = -EPERM; break; case FJES_CMD_STATUS_TIMEOUT: + trace_fjes_hw_register_buff_addr_err("Timeout"); result = -EBUSY; break; case FJES_CMD_STATUS_ERROR_PARAM: @@ -595,6 +623,7 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid) res_buf->unshare_buffer.length = 0; res_buf->unshare_buffer.code = 0; + trace_fjes_hw_unregister_buff_addr_req(req_buf); ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_UNSHARE_BUFFER); timeout = FJES_COMMAND_REQ_BUFF_TIMEOUT * 1000; @@ -616,8 +645,11 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid) result = 0; + trace_fjes_hw_unregister_buff_addr(res_buf, timeout); + if (res_buf->unshare_buffer.length != FJES_DEV_COMMAND_UNSHARE_BUFFER_RES_LEN) { + trace_fjes_hw_unregister_buff_addr_err("Invalid res_buf"); result = -ENOMSG; } else if (ret == FJES_CMD_STATUS_NORMAL) { switch (res_buf->unshare_buffer.code) { @@ -626,6 +658,7 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid) clear_bit(dest_epid, &hw->hw_info.buffer_share_bit); break; case FJES_CMD_REQ_RES_CODE_BUSY: + trace_fjes_hw_unregister_buff_addr_err("Busy Timeout"); result = -EBUSY; break; default: @@ -638,6 +671,7 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid) result = -EPERM; break; case FJES_CMD_STATUS_TIMEOUT: + trace_fjes_hw_unregister_buff_addr_err("Timeout"); result = -EBUSY; break; case FJES_CMD_STATUS_ERROR_PARAM: @@ -752,6 +786,7 @@ void fjes_hw_raise_epstop(struct fjes_hw *hw) case EP_PARTNER_SHARED: fjes_hw_raise_interrupt(hw, epidx, REG_ICTL_MASK_TXRX_STOP_REQ); + hw->ep_shm_info[epidx].ep_stats.send_intr_unshare += 1; break; default: break; @@ -1062,6 +1097,9 @@ static void fjes_hw_update_zone_task(struct work_struct *work) break; } mutex_unlock(&hw->hw_info.lock); + + hw->ep_shm_info[epidx].ep_stats + .com_regist_buf_exec += 1; } if (test_bit(epidx, &unshare_bit)) { @@ -1085,6 +1123,9 @@ static void fjes_hw_update_zone_task(struct work_struct *work) mutex_unlock(&hw->hw_info.lock); + hw->ep_shm_info[epidx].ep_stats + .com_unregist_buf_exec += 1; + if (ret == 0) { spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf( @@ -1099,6 +1140,8 @@ static void fjes_hw_update_zone_task(struct work_struct *work) fjes_hw_raise_interrupt(hw, epidx, REG_ICTL_MASK_TXRX_STOP_REQ); + hw->ep_shm_info[epidx].ep_stats.send_intr_unshare += 1; + set_bit(epidx, &hw->txrx_stop_req_bit); spin_lock_irqsave(&hw->rx_status_lock, flags); hw->ep_shm_info[epidx].tx. @@ -1147,3 +1190,125 @@ static void fjes_hw_epstop_task(struct work_struct *work) } } } + +int fjes_hw_start_debug(struct fjes_hw *hw) +{ + union fjes_device_command_req *req_buf = hw->hw_info.req_buf; + union fjes_device_command_res *res_buf = hw->hw_info.res_buf; + enum fjes_dev_command_response_e ret; + int page_count; + int result = 0; + void *addr; + int i; + + if (!hw->hw_info.trace) + return -EPERM; + memset(hw->hw_info.trace, 0, FJES_DEBUG_BUFFER_SIZE); + + memset(req_buf, 0, hw->hw_info.req_buf_size); + memset(res_buf, 0, hw->hw_info.res_buf_size); + + req_buf->start_trace.length = + FJES_DEV_COMMAND_START_DBG_REQ_LEN(hw->hw_info.trace_size); + req_buf->start_trace.mode = hw->debug_mode; + req_buf->start_trace.buffer_len = hw->hw_info.trace_size; + page_count = hw->hw_info.trace_size / FJES_DEBUG_PAGE_SIZE; + for (i = 0; i < page_count; i++) { + addr = ((u8 *)hw->hw_info.trace) + i * FJES_DEBUG_PAGE_SIZE; + req_buf->start_trace.buffer[i] = + (__le64)(page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr)); + } + + res_buf->start_trace.length = 0; + res_buf->start_trace.code = 0; + + trace_fjes_hw_start_debug_req(req_buf); + ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_START_DEBUG); + trace_fjes_hw_start_debug(res_buf); + + if (res_buf->start_trace.length != + FJES_DEV_COMMAND_START_DBG_RES_LEN) { + result = -ENOMSG; + trace_fjes_hw_start_debug_err("Invalid res_buf"); + } else if (ret == FJES_CMD_STATUS_NORMAL) { + switch (res_buf->start_trace.code) { + case FJES_CMD_REQ_RES_CODE_NORMAL: + result = 0; + break; + default: + result = -EPERM; + break; + } + } else { + switch (ret) { + case FJES_CMD_STATUS_UNKNOWN: + result = -EPERM; + break; + case FJES_CMD_STATUS_TIMEOUT: + trace_fjes_hw_start_debug_err("Busy Timeout"); + result = -EBUSY; + break; + case FJES_CMD_STATUS_ERROR_PARAM: + case FJES_CMD_STATUS_ERROR_STATUS: + default: + result = -EPERM; + break; + } + } + + return result; +} + +int fjes_hw_stop_debug(struct fjes_hw *hw) +{ + union fjes_device_command_req *req_buf = hw->hw_info.req_buf; + union fjes_device_command_res *res_buf = hw->hw_info.res_buf; + enum fjes_dev_command_response_e ret; + int result = 0; + + if (!hw->hw_info.trace) + return -EPERM; + + memset(req_buf, 0, hw->hw_info.req_buf_size); + memset(res_buf, 0, hw->hw_info.res_buf_size); + req_buf->stop_trace.length = FJES_DEV_COMMAND_STOP_DBG_REQ_LEN; + + res_buf->stop_trace.length = 0; + res_buf->stop_trace.code = 0; + + ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_STOP_DEBUG); + trace_fjes_hw_stop_debug(res_buf); + + if (res_buf->stop_trace.length != FJES_DEV_COMMAND_STOP_DBG_RES_LEN) { + trace_fjes_hw_stop_debug_err("Invalid res_buf"); + result = -ENOMSG; + } else if (ret == FJES_CMD_STATUS_NORMAL) { + switch (res_buf->stop_trace.code) { + case FJES_CMD_REQ_RES_CODE_NORMAL: + result = 0; + hw->debug_mode = 0; + break; + default: + result = -EPERM; + break; + } + } else { + switch (ret) { + case FJES_CMD_STATUS_UNKNOWN: + result = -EPERM; + break; + case FJES_CMD_STATUS_TIMEOUT: + result = -EBUSY; + trace_fjes_hw_stop_debug_err("Busy Timeout"); + break; + case FJES_CMD_STATUS_ERROR_PARAM: + case FJES_CMD_STATUS_ERROR_STATUS: + default: + result = -EPERM; + break; + } + } + + return result; +} diff --git a/drivers/net/fjes/fjes_hw.h b/drivers/net/fjes/fjes_hw.h index 1445ac99d6e3..3a6da0996a0e 100644 --- a/drivers/net/fjes/fjes_hw.h +++ b/drivers/net/fjes/fjes_hw.h @@ -33,6 +33,9 @@ struct fjes_hw; #define EP_BUFFER_SUPPORT_VLAN_MAX 4 #define EP_BUFFER_INFO_SIZE 4096 +#define FJES_DEBUG_PAGE_SIZE 4096 +#define FJES_DEBUG_BUFFER_SIZE (16 * FJES_DEBUG_PAGE_SIZE) + #define FJES_DEVICE_RESET_TIMEOUT ((17 + 1) * 3 * 8) /* sec */ #define FJES_COMMAND_REQ_TIMEOUT ((5 + 1) * 3 * 8) /* sec */ #define FJES_COMMAND_REQ_BUFF_TIMEOUT (60 * 3) /* sec */ @@ -94,6 +97,12 @@ struct fjes_hw; #define FJES_DEV_RES_BUF_SIZE(maxep) \ FJES_DEV_COMMAND_INFO_RES_LEN(maxep) +#define FJES_DEV_COMMAND_START_DBG_REQ_LEN(byte) \ + (16 + (8 * (byte) / FJES_DEBUG_PAGE_SIZE)) +#define FJES_DEV_COMMAND_START_DBG_RES_LEN (8) +#define FJES_DEV_COMMAND_STOP_DBG_REQ_LEN (4) +#define FJES_DEV_COMMAND_STOP_DBG_RES_LEN (8) + /* Frame & MTU */ struct esmem_frame { __le32 frame_size; @@ -173,6 +182,8 @@ enum fjes_dev_command_request_type { FJES_CMD_REQ_INFO = 0x0001, FJES_CMD_REQ_SHARE_BUFFER = 0x0002, FJES_CMD_REQ_UNSHARE_BUFFER = 0x0004, + FJES_CMD_REQ_START_DEBUG = 0x0100, + FJES_CMD_REQ_STOP_DEBUG = 0x0200, }; /* parameter for command control */ @@ -228,6 +239,24 @@ union ep_buffer_info { }; +/* statistics of EP */ +struct fjes_drv_ep_stats { + u64 com_regist_buf_exec; + u64 com_unregist_buf_exec; + u64 send_intr_rx; + u64 send_intr_unshare; + u64 send_intr_zoneupdate; + u64 recv_intr_rx; + u64 recv_intr_unshare; + u64 recv_intr_stop; + u64 recv_intr_zoneupdate; + u64 tx_buffer_full; + u64 tx_dropped_not_shared; + u64 tx_dropped_ver_mismatch; + u64 tx_dropped_buf_size_mismatch; + u64 tx_dropped_vlanid_mismatch; +}; + /* buffer pair for Extended Partition */ struct ep_share_mem_info { struct epbuf_handler { @@ -238,6 +267,7 @@ struct ep_share_mem_info { } tx, rx; struct rtnl_link_stats64 net_stats; + struct fjes_drv_ep_stats ep_stats; u16 tx_status_work; @@ -302,6 +332,8 @@ struct fjes_hw { struct fjes_hw_info hw_info; spinlock_t rx_status_lock; /* spinlock for rx_status */ + + u32 debug_mode; }; int fjes_hw_init(struct fjes_hw *); @@ -334,4 +366,6 @@ void *fjes_hw_epbuf_rx_curpkt_get_addr(struct epbuf_handler *, size_t *); void fjes_hw_epbuf_rx_curpkt_drop(struct epbuf_handler *); int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *, void *, size_t); +int fjes_hw_start_debug(struct fjes_hw *); +int fjes_hw_stop_debug(struct fjes_hw *); #endif /* FJES_HW_H_ */ diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index e46b1ebbbff4..b77e4ecf3cf2 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -27,9 +27,10 @@ #include <linux/interrupt.h> #include "fjes.h" +#include "fjes_trace.h" #define MAJ 1 -#define MIN 1 +#define MIN 2 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) #define DRV_NAME "fjes" char fjes_driver_name[] = DRV_NAME; @@ -366,6 +367,8 @@ static int fjes_setup_resources(struct fjes_adapter *adapter) FJES_ZONING_STATUS_ENABLE)) { fjes_hw_raise_interrupt(hw, epidx, REG_ICTL_MASK_INFO_UPDATE); + hw->ep_shm_info[epidx].ep_stats + .send_intr_zoneupdate += 1; } } @@ -397,6 +400,9 @@ static int fjes_setup_resources(struct fjes_adapter *adapter) adapter->force_reset = true; return result; } + + hw->ep_shm_info[epidx].ep_stats + .com_regist_buf_exec += 1; } } @@ -422,6 +428,8 @@ static void fjes_free_resources(struct fjes_adapter *adapter) result = fjes_hw_unregister_buff_addr(hw, epidx); mutex_unlock(&hw->hw_info.lock); + hw->ep_shm_info[epidx].ep_stats.com_unregist_buf_exec += 1; + if (result) reset_flag = true; @@ -567,6 +575,7 @@ static void fjes_raise_intr_rxdata_task(struct work_struct *work) FJES_RX_POLL_WORK)) { fjes_hw_raise_interrupt(hw, epid, REG_ICTL_MASK_RX_DATA); + hw->ep_shm_info[epid].ep_stats.send_intr_rx += 1; } } @@ -663,6 +672,9 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) pstatus = fjes_hw_get_partner_ep_status(hw, dest_epid); if (pstatus != EP_PARTNER_SHARED) { + if (!is_multi) + hw->ep_shm_info[dest_epid].ep_stats + .tx_dropped_not_shared += 1; ret = NETDEV_TX_OK; } else if (!fjes_hw_check_epbuf_version( &adapter->hw.ep_shm_info[dest_epid].rx, 0)) { @@ -670,6 +682,8 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) adapter->stats64.tx_carrier_errors += 1; hw->ep_shm_info[dest_epid].net_stats .tx_carrier_errors += 1; + hw->ep_shm_info[dest_epid].ep_stats + .tx_dropped_ver_mismatch += 1; ret = NETDEV_TX_OK; } else if (!fjes_hw_check_mtu( @@ -679,12 +693,16 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) hw->ep_shm_info[dest_epid].net_stats.tx_dropped += 1; adapter->stats64.tx_errors += 1; hw->ep_shm_info[dest_epid].net_stats.tx_errors += 1; + hw->ep_shm_info[dest_epid].ep_stats + .tx_dropped_buf_size_mismatch += 1; ret = NETDEV_TX_OK; } else if (vlan && !fjes_hw_check_vlan_id( &adapter->hw.ep_shm_info[dest_epid].rx, vlan_id)) { + hw->ep_shm_info[dest_epid].ep_stats + .tx_dropped_vlanid_mismatch += 1; ret = NETDEV_TX_OK; } else { if (len < VLAN_ETH_HLEN) { @@ -718,6 +736,8 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_OK; } else { netif_trans_update(netdev); + hw->ep_shm_info[dest_epid].ep_stats + .tx_buffer_full += 1; netif_tx_stop_queue(cur_queue); if (!work_pending(&adapter->tx_stall_task)) @@ -885,6 +905,7 @@ static void fjes_txrx_stop_req_irq(struct fjes_adapter *adapter, unsigned long flags; status = fjes_hw_get_partner_ep_status(hw, src_epid); + trace_fjes_txrx_stop_req_irq_pre(hw, src_epid, status); switch (status) { case EP_PARTNER_UNSHARE: case EP_PARTNER_COMPLETE: @@ -915,6 +936,7 @@ static void fjes_txrx_stop_req_irq(struct fjes_adapter *adapter, } break; } + trace_fjes_txrx_stop_req_irq_post(hw, src_epid); } static void fjes_stop_req_irq(struct fjes_adapter *adapter, int src_epid) @@ -926,6 +948,7 @@ static void fjes_stop_req_irq(struct fjes_adapter *adapter, int src_epid) set_bit(src_epid, &hw->hw_info.buffer_unshare_reserve_bit); status = fjes_hw_get_partner_ep_status(hw, src_epid); + trace_fjes_stop_req_irq_pre(hw, src_epid, status); switch (status) { case EP_PARTNER_WAITING: spin_lock_irqsave(&hw->rx_status_lock, flags); @@ -949,6 +972,7 @@ static void fjes_stop_req_irq(struct fjes_adapter *adapter, int src_epid) queue_work(adapter->control_wq, &hw->epstop_task); break; } + trace_fjes_stop_req_irq_post(hw, src_epid); } static void fjes_update_zone_irq(struct fjes_adapter *adapter, @@ -970,21 +994,33 @@ static irqreturn_t fjes_intr(int irq, void *data) icr = fjes_hw_capture_interrupt_status(hw); if (icr & REG_IS_MASK_IS_ASSERT) { - if (icr & REG_ICTL_MASK_RX_DATA) + if (icr & REG_ICTL_MASK_RX_DATA) { fjes_rx_irq(adapter, icr & REG_IS_MASK_EPID); + hw->ep_shm_info[icr & REG_IS_MASK_EPID].ep_stats + .recv_intr_rx += 1; + } - if (icr & REG_ICTL_MASK_DEV_STOP_REQ) + if (icr & REG_ICTL_MASK_DEV_STOP_REQ) { fjes_stop_req_irq(adapter, icr & REG_IS_MASK_EPID); + hw->ep_shm_info[icr & REG_IS_MASK_EPID].ep_stats + .recv_intr_stop += 1; + } - if (icr & REG_ICTL_MASK_TXRX_STOP_REQ) + if (icr & REG_ICTL_MASK_TXRX_STOP_REQ) { fjes_txrx_stop_req_irq(adapter, icr & REG_IS_MASK_EPID); + hw->ep_shm_info[icr & REG_IS_MASK_EPID].ep_stats + .recv_intr_unshare += 1; + } if (icr & REG_ICTL_MASK_TXRX_STOP_DONE) fjes_hw_set_irqmask(hw, REG_ICTL_MASK_TXRX_STOP_DONE, true); - if (icr & REG_ICTL_MASK_INFO_UPDATE) + if (icr & REG_ICTL_MASK_INFO_UPDATE) { fjes_update_zone_irq(adapter, icr & REG_IS_MASK_EPID); + hw->ep_shm_info[icr & REG_IS_MASK_EPID].ep_stats + .recv_intr_zoneupdate += 1; + } ret = IRQ_HANDLED; } else { @@ -1221,6 +1257,8 @@ static int fjes_probe(struct platform_device *plat_dev) netif_carrier_off(netdev); + fjes_dbg_adapter_init(adapter); + return 0; err_hw_exit: @@ -1238,6 +1276,8 @@ static int fjes_remove(struct platform_device *plat_dev) struct fjes_adapter *adapter = netdev_priv(netdev); struct fjes_hw *hw = &adapter->hw; + fjes_dbg_adapter_exit(adapter); + cancel_delayed_work_sync(&adapter->interrupt_watch_task); cancel_work_sync(&adapter->unshare_watch_task); cancel_work_sync(&adapter->raise_intr_rxdata_task); @@ -1276,6 +1316,8 @@ static void fjes_netdev_setup(struct net_device *netdev) netdev->netdev_ops = &fjes_netdev_ops; fjes_set_ethtool_ops(netdev); netdev->mtu = fjes_support_mtu[3]; + netdev->min_mtu = fjes_support_mtu[0]; + netdev->max_mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; } @@ -1364,6 +1406,8 @@ static void fjes_watch_unshare_task(struct work_struct *work) break; } mutex_unlock(&hw->hw_info.lock); + hw->ep_shm_info[epidx].ep_stats + .com_unregist_buf_exec += 1; spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&hw->ep_shm_info[epidx].tx, @@ -1406,6 +1450,9 @@ static void fjes_watch_unshare_task(struct work_struct *work) } mutex_unlock(&hw->hw_info.lock); + hw->ep_shm_info[epidx].ep_stats + .com_unregist_buf_exec += 1; + spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf( &hw->ep_shm_info[epidx].tx, @@ -1437,9 +1484,13 @@ static int __init fjes_init_module(void) pr_info("%s - version %s - %s\n", fjes_driver_string, fjes_driver_version, fjes_copyright); + fjes_dbg_init(); + result = platform_driver_register(&fjes_driver); - if (result < 0) + if (result < 0) { + fjes_dbg_exit(); return result; + } result = acpi_bus_register_driver(&fjes_acpi_driver); if (result < 0) @@ -1449,6 +1500,7 @@ static int __init fjes_init_module(void) fail_acpi_driver: platform_driver_unregister(&fjes_driver); + fjes_dbg_exit(); return result; } @@ -1459,6 +1511,7 @@ static void __exit fjes_exit_module(void) { acpi_bus_unregister_driver(&fjes_acpi_driver); platform_driver_unregister(&fjes_driver); + fjes_dbg_exit(); } module_exit(fjes_exit_module); diff --git a/drivers/net/fjes/fjes_trace.c b/drivers/net/fjes/fjes_trace.c new file mode 100644 index 000000000000..066fa765d5bb --- /dev/null +++ b/drivers/net/fjes/fjes_trace.c @@ -0,0 +1,30 @@ +/* + * FUJITSU Extended Socket Network Device driver + * Copyright (c) 2015-2016 FUJITSU LIMITED + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/module.h> + +#ifndef __CHECKER__ +#include "fjes_hw.h" + +#define CREATE_TRACE_POINTS +#include "fjes_trace.h" + +#endif /* __CHECKER__ */ diff --git a/drivers/net/fjes/fjes_trace.h b/drivers/net/fjes/fjes_trace.h new file mode 100644 index 000000000000..cca01a1b3d64 --- /dev/null +++ b/drivers/net/fjes/fjes_trace.h @@ -0,0 +1,380 @@ +/* + * FUJITSU Extended Socket Network Device driver + * Copyright (c) 2015-2016 FUJITSU LIMITED + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#if !defined(FJES_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define FJES_TRACE_H_ + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fjes + +/* tracepoints for fjes_hw.c */ + +TRACE_EVENT(fjes_hw_issue_request_command, + TP_PROTO(union REG_CR *cr, union REG_CS *cs, int timeout, + enum fjes_dev_command_response_e ret), + TP_ARGS(cr, cs, timeout, ret), + TP_STRUCT__entry( + __field(u16, cr_req) + __field(u8, cr_error) + __field(u16, cr_err_info) + __field(u8, cr_req_start) + __field(u16, cs_req) + __field(u8, cs_busy) + __field(u8, cs_complete) + __field(int, timeout) + __field(int, ret); + ), + TP_fast_assign( + __entry->cr_req = cr->bits.req_code; + __entry->cr_error = cr->bits.error; + __entry->cr_err_info = cr->bits.err_info; + __entry->cr_req_start = cr->bits.req_start; + __entry->cs_req = cs->bits.req_code; + __entry->cs_busy = cs->bits.busy; + __entry->cs_complete = cs->bits.complete; + __entry->timeout = timeout; + __entry->ret = ret; + ), + TP_printk("CR=[req=%04x, error=%u, err_info=%04x, req_start=%u], CS=[req=%04x, busy=%u, complete=%u], timeout=%d, ret=%d", + __entry->cr_req, __entry->cr_error, __entry->cr_err_info, + __entry->cr_req_start, __entry->cs_req, __entry->cs_busy, + __entry->cs_complete, __entry->timeout, __entry->ret) +); + +TRACE_EVENT(fjes_hw_request_info, + TP_PROTO(struct fjes_hw *hw, union fjes_device_command_res *res_buf), + TP_ARGS(hw, res_buf), + TP_STRUCT__entry( + __field(int, length) + __field(int, code) + __dynamic_array(u8, zone, hw->max_epid) + __dynamic_array(u8, status, hw->max_epid) + ), + TP_fast_assign( + int x; + + __entry->length = res_buf->info.length; + __entry->code = res_buf->info.code; + for (x = 0; x < hw->max_epid; x++) { + *((u8 *)__get_dynamic_array(zone) + x) = + res_buf->info.info[x].zone; + *((u8 *)__get_dynamic_array(status) + x) = + res_buf->info.info[x].es_status; + } + ), + TP_printk("res_buf=[length=%d, code=%d, es_zones=%s, es_status=%s]", + __entry->length, __entry->code, + __print_array(__get_dynamic_array(zone), + __get_dynamic_array_len(zone) / sizeof(u8), + sizeof(u8)), + __print_array(__get_dynamic_array(status), + __get_dynamic_array_len(status) / sizeof(u8), + sizeof(u8))) +); + +TRACE_EVENT(fjes_hw_request_info_err, + TP_PROTO(char *err), + TP_ARGS(err), + TP_STRUCT__entry( + __string(err, err) + ), + TP_fast_assign( + __assign_str(err, err); + ), + TP_printk("%s", __get_str(err)) +); + +TRACE_EVENT(fjes_hw_register_buff_addr_req, + TP_PROTO(union fjes_device_command_req *req_buf, + struct ep_share_mem_info *buf_pair), + TP_ARGS(req_buf, buf_pair), + TP_STRUCT__entry( + __field(int, length) + __field(int, epid) + __field(u64, tx) + __field(size_t, tx_size) + __field(u64, rx) + __field(size_t, rx_size) + ), + TP_fast_assign( + void *tx, *rx; + + tx = (void *)buf_pair->tx.buffer; + rx = (void *)buf_pair->rx.buffer; + __entry->length = req_buf->share_buffer.length; + __entry->epid = req_buf->share_buffer.epid; + __entry->tx_size = buf_pair->tx.size; + __entry->rx_size = buf_pair->rx.size; + __entry->tx = page_to_phys(vmalloc_to_page(tx)) + + offset_in_page(tx); + __entry->rx = page_to_phys(vmalloc_to_page(rx)) + + offset_in_page(rx); + ), + TP_printk("req_buf=[length=%d, epid=%d], TX=[phy=0x%016llx, size=%zu], RX=[phy=0x%016llx, size=%zu]", + __entry->length, __entry->epid, __entry->tx, __entry->tx_size, + __entry->rx, __entry->rx_size) +); + +TRACE_EVENT(fjes_hw_register_buff_addr, + TP_PROTO(union fjes_device_command_res *res_buf, int timeout), + TP_ARGS(res_buf, timeout), + TP_STRUCT__entry( + __field(int, length) + __field(int, code) + __field(int, timeout) + ), + TP_fast_assign( + __entry->length = res_buf->share_buffer.length; + __entry->code = res_buf->share_buffer.code; + __entry->timeout = timeout; + ), + TP_printk("res_buf=[length=%d, code=%d], timeout=%d", + __entry->length, __entry->code, __entry->timeout) +); + +TRACE_EVENT(fjes_hw_register_buff_addr_err, + TP_PROTO(char *err), + TP_ARGS(err), + TP_STRUCT__entry( + __string(err, err) + ), + TP_fast_assign( + __assign_str(err, err); + ), + TP_printk("%s", __get_str(err)) +); + +TRACE_EVENT(fjes_hw_unregister_buff_addr_req, + TP_PROTO(union fjes_device_command_req *req_buf), + TP_ARGS(req_buf), + TP_STRUCT__entry( + __field(int, length) + __field(int, epid) + ), + TP_fast_assign( + __entry->length = req_buf->unshare_buffer.length; + __entry->epid = req_buf->unshare_buffer.epid; + ), + TP_printk("req_buf=[length=%d, epid=%d]", + __entry->length, __entry->epid) +); + +TRACE_EVENT(fjes_hw_unregister_buff_addr, + TP_PROTO(union fjes_device_command_res *res_buf, int timeout), + TP_ARGS(res_buf, timeout), + TP_STRUCT__entry( + __field(int, length) + __field(int, code) + __field(int, timeout) + ), + TP_fast_assign( + __entry->length = res_buf->unshare_buffer.length; + __entry->code = res_buf->unshare_buffer.code; + __entry->timeout = timeout; + ), + TP_printk("res_buf=[length=%d, code=%d], timeout=%d", + __entry->length, __entry->code, __entry->timeout) +); + +TRACE_EVENT(fjes_hw_unregister_buff_addr_err, + TP_PROTO(char *err), + TP_ARGS(err), + TP_STRUCT__entry( + __string(err, err) + ), + TP_fast_assign( + __assign_str(err, err); + ), + TP_printk("%s", __get_str(err)) +); + +TRACE_EVENT(fjes_hw_start_debug_req, + TP_PROTO(union fjes_device_command_req *req_buf), + TP_ARGS(req_buf), + TP_STRUCT__entry( + __field(int, length) + __field(int, mode) + __field(phys_addr_t, buffer) + ), + TP_fast_assign( + __entry->length = req_buf->start_trace.length; + __entry->mode = req_buf->start_trace.mode; + __entry->buffer = req_buf->start_trace.buffer[0]; + ), + TP_printk("req_buf=[length=%d, mode=%d, buffer=%pap]", + __entry->length, __entry->mode, &__entry->buffer) +); + +TRACE_EVENT(fjes_hw_start_debug, + TP_PROTO(union fjes_device_command_res *res_buf), + TP_ARGS(res_buf), + TP_STRUCT__entry( + __field(int, length) + __field(int, code) + ), + TP_fast_assign( + __entry->length = res_buf->start_trace.length; + __entry->code = res_buf->start_trace.code; + ), + TP_printk("res_buf=[length=%d, code=%d]", __entry->length, __entry->code) +); + +TRACE_EVENT(fjes_hw_start_debug_err, + TP_PROTO(char *err), + TP_ARGS(err), + TP_STRUCT__entry( + __string(err, err) + ), + TP_fast_assign( + __assign_str(err, err) + ), + TP_printk("%s", __get_str(err)) +); + +TRACE_EVENT(fjes_hw_stop_debug, + TP_PROTO(union fjes_device_command_res *res_buf), + TP_ARGS(res_buf), + TP_STRUCT__entry( + __field(int, length) + __field(int, code) + ), + TP_fast_assign( + __entry->length = res_buf->stop_trace.length; + __entry->code = res_buf->stop_trace.code; + ), + TP_printk("res_buf=[length=%d, code=%d]", __entry->length, __entry->code) +); + +TRACE_EVENT(fjes_hw_stop_debug_err, + TP_PROTO(char *err), + TP_ARGS(err), + TP_STRUCT__entry( + __string(err, err) + ), + TP_fast_assign( + __assign_str(err, err) + ), + TP_printk("%s", __get_str(err)) +); + +/* tracepoints for fjes_main.c */ + +TRACE_EVENT(fjes_txrx_stop_req_irq_pre, + TP_PROTO(struct fjes_hw *hw, int src_epid, + enum ep_partner_status status), + TP_ARGS(hw, src_epid, status), + TP_STRUCT__entry( + __field(int, src_epid) + __field(enum ep_partner_status, status) + __field(u8, ep_status) + __field(unsigned long, txrx_stop_req_bit) + __field(u16, rx_status) + ), + TP_fast_assign( + __entry->src_epid = src_epid; + __entry->status = status; + __entry->ep_status = hw->hw_info.share->ep_status[src_epid]; + __entry->txrx_stop_req_bit = hw->txrx_stop_req_bit; + __entry->rx_status = + hw->ep_shm_info[src_epid].tx.info->v1i.rx_status; + ), + TP_printk("epid=%d, partner_status=%d, ep_status=%x, txrx_stop_req_bit=%016lx, tx.rx_status=%08x", + __entry->src_epid, __entry->status, __entry->ep_status, + __entry->txrx_stop_req_bit, __entry->rx_status) +); + +TRACE_EVENT(fjes_txrx_stop_req_irq_post, + TP_PROTO(struct fjes_hw *hw, int src_epid), + TP_ARGS(hw, src_epid), + TP_STRUCT__entry( + __field(int, src_epid) + __field(u8, ep_status) + __field(unsigned long, txrx_stop_req_bit) + __field(u16, rx_status) + ), + TP_fast_assign( + __entry->src_epid = src_epid; + __entry->ep_status = hw->hw_info.share->ep_status[src_epid]; + __entry->txrx_stop_req_bit = hw->txrx_stop_req_bit; + __entry->rx_status = hw->ep_shm_info[src_epid].tx.info->v1i.rx_status; + ), + TP_printk("epid=%d, ep_status=%x, txrx_stop_req_bit=%016lx, tx.rx_status=%08x", + __entry->src_epid, __entry->ep_status, + __entry->txrx_stop_req_bit, __entry->rx_status) +); + +TRACE_EVENT(fjes_stop_req_irq_pre, + TP_PROTO(struct fjes_hw *hw, int src_epid, + enum ep_partner_status status), + TP_ARGS(hw, src_epid, status), + TP_STRUCT__entry( + __field(int, src_epid) + __field(enum ep_partner_status, status) + __field(u8, ep_status) + __field(unsigned long, txrx_stop_req_bit) + __field(u16, rx_status) + ), + TP_fast_assign( + __entry->src_epid = src_epid; + __entry->status = status; + __entry->ep_status = hw->hw_info.share->ep_status[src_epid]; + __entry->txrx_stop_req_bit = hw->txrx_stop_req_bit; + __entry->rx_status = + hw->ep_shm_info[src_epid].tx.info->v1i.rx_status; + ), + TP_printk("epid=%d, partner_status=%d, ep_status=%x, txrx_stop_req_bit=%016lx, tx.rx_status=%08x", + __entry->src_epid, __entry->status, __entry->ep_status, + __entry->txrx_stop_req_bit, __entry->rx_status) +); + +TRACE_EVENT(fjes_stop_req_irq_post, + TP_PROTO(struct fjes_hw *hw, int src_epid), + TP_ARGS(hw, src_epid), + TP_STRUCT__entry( + __field(int, src_epid) + __field(u8, ep_status) + __field(unsigned long, txrx_stop_req_bit) + __field(u16, rx_status) + ), + TP_fast_assign( + __entry->src_epid = src_epid; + __entry->ep_status = hw->hw_info.share->ep_status[src_epid]; + __entry->txrx_stop_req_bit = hw->txrx_stop_req_bit; + __entry->rx_status = + hw->ep_shm_info[src_epid].tx.info->v1i.rx_status; + ), + TP_printk("epid=%d, ep_status=%x, txrx_stop_req_bit=%016lx, tx.rx_status=%08x", + __entry->src_epid, __entry->ep_status, + __entry->txrx_stop_req_bit, __entry->rx_status) +); + +#endif /* FJES_TRACE_H_ */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../drivers/net/fjes +#define TRACE_INCLUDE_FILE fjes_trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 42edd7b7902f..85a423a66478 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1057,39 +1057,18 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) return geneve_xmit_skb(skb, dev, info); } -static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) +static int geneve_change_mtu(struct net_device *dev, int new_mtu) { - struct geneve_dev *geneve = netdev_priv(dev); - /* The max_mtu calculation does not take account of GENEVE - * options, to avoid excluding potentially valid - * configurations. + /* Only possible if called internally, ndo_change_mtu path's new_mtu + * is guaranteed to be between dev->min_mtu and dev->max_mtu. */ - int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; - - if (geneve->remote.sa.sa_family == AF_INET6) - max_mtu -= sizeof(struct ipv6hdr); - else - max_mtu -= sizeof(struct iphdr); - - if (new_mtu < 68) - return -EINVAL; - - if (new_mtu > max_mtu) { - if (strict) - return -EINVAL; - - new_mtu = max_mtu; - } + if (new_mtu > dev->max_mtu) + new_mtu = dev->max_mtu; dev->mtu = new_mtu; return 0; } -static int geneve_change_mtu(struct net_device *dev, int new_mtu) -{ - return __geneve_change_mtu(dev, new_mtu, true); -} - static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); @@ -1193,6 +1172,14 @@ static void geneve_setup(struct net_device *dev) dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* MTU range: 68 - (something less than 65535) */ + dev->min_mtu = ETH_MIN_MTU; + /* The max_mtu calculation does not take account of GENEVE + * options, to avoid excluding potentially valid + * configurations. This will be further reduced by IPvX hdr size. + */ + dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; + netif_keep_dst(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; @@ -1308,10 +1295,13 @@ static int geneve_configure(struct net *net, struct net_device *dev, /* make enough headroom for basic scenario */ encap_len = GENEVE_BASE_HLEN + ETH_HLEN; - if (remote->sa.sa_family == AF_INET) + if (remote->sa.sa_family == AF_INET) { encap_len += sizeof(struct iphdr); - else + dev->max_mtu -= sizeof(struct iphdr); + } else { encap_len += sizeof(struct ipv6hdr); + dev->max_mtu -= sizeof(struct ipv6hdr); + } dev->needed_headroom = encap_len + ETH_HLEN; if (metadata) { @@ -1511,7 +1501,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. */ - err = __geneve_change_mtu(dev, IP_MAX_MTU, false); + err = geneve_change_mtu(dev, IP_MAX_MTU); if (err) goto err; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 97e0cbca0a08..719d19f35673 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1094,14 +1094,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_family gtp_genl_family = { - .id = GENL_ID_GENERATE, - .name = "gtp", - .version = 0, - .hdrsize = 0, - .maxattr = GTPA_MAX, - .netnsok = true, -}; +static struct genl_family gtp_genl_family; static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, u32 type, struct pdp_ctx *pctx) @@ -1297,6 +1290,17 @@ static const struct genl_ops gtp_genl_ops[] = { }, }; +static struct genl_family gtp_genl_family __ro_after_init = { + .name = "gtp", + .version = 0, + .hdrsize = 0, + .maxattr = GTPA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = gtp_genl_ops, + .n_ops = ARRAY_SIZE(gtp_genl_ops), +}; + static int __net_init gtp_net_init(struct net *net) { struct gtp_net *gn = net_generic(net, gtp_net_id); @@ -1336,7 +1340,7 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family_with_ops(>p_genl_family, gtp_genl_ops); + err = genl_register_family(>p_genl_family); if (err < 0) goto unreg_rtnl_link; diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 95c0b45a68fb..f5a9728b89f3 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -68,7 +68,6 @@ static const struct net_device_ops rr_netdev_ops = { .ndo_stop = rr_close, .ndo_do_ioctl = rr_ioctl, .ndo_start_xmit = rr_start_xmit, - .ndo_change_mtu = hippi_change_mtu, .ndo_set_mac_address = hippi_mac_addr, }; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index f4fbcb5aa24a..3958adade7eb 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -606,8 +606,8 @@ struct nvsp_message { } __packed; -#define NETVSC_MTU 65536 -#define NETVSC_MTU_MIN 68 +#define NETVSC_MTU 65535 +#define NETVSC_MTU_MIN ETH_MIN_MTU #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 720b5fa9e625..e2bfaac1801d 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -888,6 +888,13 @@ int netvsc_send(struct hv_device *device, if (!net_device) return -ENODEV; + /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get + * here before the negotiation with the host is finished and + * send_section_map may not be allocated yet. + */ + if (!net_device->send_section_map) + return -EAGAIN; + out_channel = net_device->chn_table[q_idx]; packet->send_buf_index = NETVSC_INVALID_INDEX; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f6382150b16a..9522763c8faf 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -871,19 +871,12 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = ndevctx->nvdev; struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; - int limit = ETH_DATA_LEN; u32 num_chn; int ret = 0; if (ndevctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; - if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) - limit = NETVSC_MTU - ETH_HLEN; - - if (mtu < NETVSC_MTU_MIN || mtu > limit) - return -EINVAL; - ret = netvsc_close(ndev); if (ret) goto out; @@ -1401,6 +1394,13 @@ static int netvsc_probe(struct hv_device *dev, netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); + /* MTU range: 68 - 1500 or 65521 */ + net->min_mtu = NETVSC_MTU_MIN; + if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) + net->max_mtu = NETVSC_MTU - ETH_HLEN; + else + net->max_mtu = ETH_DATA_LEN; + ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f442eb366863..ab90b22e778c 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -26,7 +26,7 @@ static struct nf_hook_ops ipvl_nfops[] __read_mostly = { }, }; -static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { +static const struct l3mdev_ops ipvl_l3mdev_ops = { .l3mdev_l3_rcv = ipvlan_l3_rcv, }; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index d2e61e002926..cc00eb0db5d2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1431,14 +1431,7 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa) macsec_txsa_put(tx_sa); } -static struct genl_family macsec_fam = { - .id = GENL_ID_GENERATE, - .name = MACSEC_GENL_NAME, - .hdrsize = 0, - .version = MACSEC_GENL_VERSION, - .maxattr = MACSEC_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) @@ -2665,6 +2658,17 @@ static const struct genl_ops macsec_genl_ops[] = { }, }; +static struct genl_family macsec_fam __ro_after_init = { + .name = MACSEC_GENL_NAME, + .hdrsize = 0, + .version = MACSEC_GENL_VERSION, + .maxattr = MACSEC_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = macsec_genl_ops, + .n_ops = ARRAY_SIZE(macsec_genl_ops), +}; + static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -2980,6 +2984,8 @@ static void macsec_free_netdev(struct net_device *dev) static void macsec_setup(struct net_device *dev) { ether_setup(dev); + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->destructor = macsec_free_netdev; @@ -3470,7 +3476,7 @@ static int __init macsec_init(void) if (err) goto notifier; - err = genl_register_family_with_ops(&macsec_fam, macsec_genl_ops); + err = genl_register_family(&macsec_fam); if (err) goto rtnl; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2d6f12a112f..13b7e0b9bd9b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -179,20 +179,20 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan, macvlan_hash_add(vlan); } -static int macvlan_addr_busy(const struct macvlan_port *port, - const unsigned char *addr) +static bool macvlan_addr_busy(const struct macvlan_port *port, + const unsigned char *addr) { /* Test to see if the specified multicast address is * currently in use by the underlying device or * another macvlan. */ if (ether_addr_equal_64bits(port->dev->dev_addr, addr)) - return 1; + return true; if (macvlan_hash_lookup(port, addr)) - return 1; + return true; - return 0; + return false; } @@ -777,7 +777,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) { struct macvlan_dev *vlan = netdev_priv(dev); - if (new_mtu < 68 || vlan->lowerdev->mtu < new_mtu) + if (vlan->lowerdev->mtu < new_mtu) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1085,6 +1085,8 @@ void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); dev->priv_flags |= IFF_UNICAST_FLT; @@ -1298,6 +1300,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > lowerdev->mtu) return -EINVAL; + /* MTU range: 68 - lowerdev->max_mtu */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = lowerdev->max_mtu; + if (!tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); diff --git a/drivers/net/mii.c b/drivers/net/mii.c index 993570b1e2ae..6d953c53eed6 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -135,6 +135,103 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) } /** + * mii_ethtool_get_link_ksettings - get settings that are specified in @cmd + * @mii: MII interface + * @cmd: requested ethtool_link_ksettings + * + * The @cmd parameter is expected to have been cleared before calling + * mii_ethtool_get_link_ksettings(). + * + * Returns 0 for success, negative on error. + */ +int mii_ethtool_get_link_ksettings(struct mii_if_info *mii, + struct ethtool_link_ksettings *cmd) +{ + struct net_device *dev = mii->dev; + u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0; + u32 nego, supported, advertising, lp_advertising; + + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); + if (mii->supports_gmii) + supported |= SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full; + + /* only supports twisted-pair */ + cmd->base.port = PORT_MII; + + /* this isn't fully supported at higher layers */ + cmd->base.phy_address = mii->phy_id; + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22; + + advertising = ADVERTISED_TP | ADVERTISED_MII; + + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmsr = mii->mdio_read(dev, mii->phy_id, MII_BMSR); + if (mii->supports_gmii) { + ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000); + stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000); + } + if (bmcr & BMCR_ANENABLE) { + advertising |= ADVERTISED_Autoneg; + cmd->base.autoneg = AUTONEG_ENABLE; + + advertising |= mii_get_an(mii, MII_ADVERTISE); + if (mii->supports_gmii) + advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000); + + if (bmsr & BMSR_ANEGCOMPLETE) { + lp_advertising = mii_get_an(mii, MII_LPA); + lp_advertising |= + mii_stat1000_to_ethtool_lpa_t(stat1000); + } else { + lp_advertising = 0; + } + + nego = advertising & lp_advertising; + + if (nego & (ADVERTISED_1000baseT_Full | + ADVERTISED_1000baseT_Half)) { + cmd->base.speed = SPEED_1000; + cmd->base.duplex = !!(nego & ADVERTISED_1000baseT_Full); + } else if (nego & (ADVERTISED_100baseT_Full | + ADVERTISED_100baseT_Half)) { + cmd->base.speed = SPEED_100; + cmd->base.duplex = !!(nego & ADVERTISED_100baseT_Full); + } else { + cmd->base.speed = SPEED_10; + cmd->base.duplex = !!(nego & ADVERTISED_10baseT_Full); + } + } else { + cmd->base.autoneg = AUTONEG_DISABLE; + + cmd->base.speed = ((bmcr & BMCR_SPEED1000 && + (bmcr & BMCR_SPEED100) == 0) ? + SPEED_1000 : + ((bmcr & BMCR_SPEED100) ? + SPEED_100 : SPEED_10)); + cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ? + DUPLEX_FULL : DUPLEX_HALF; + + lp_advertising = 0; + } + + mii->full_duplex = cmd->base.duplex; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); + + /* ignore maxtxpkt, maxrxpkt for now */ + + return 0; +} + +/** * mii_ethtool_sset - set settings that are specified in @ecmd * @mii: MII interface * @ecmd: requested ethtool_cmd @@ -227,6 +324,104 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) } /** + * mii_ethtool_set_link_ksettings - set settings that are specified in @cmd + * @mii: MII interfaces + * @cmd: requested ethtool_link_ksettings + * + * Returns 0 for success, negative on error. + */ +int mii_ethtool_set_link_ksettings(struct mii_if_info *mii, + const struct ethtool_link_ksettings *cmd) +{ + struct net_device *dev = mii->dev; + u32 speed = cmd->base.speed; + + if (speed != SPEED_10 && + speed != SPEED_100 && + speed != SPEED_1000) + return -EINVAL; + if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL) + return -EINVAL; + if (cmd->base.port != PORT_MII) + return -EINVAL; + if (cmd->base.phy_address != mii->phy_id) + return -EINVAL; + if (cmd->base.autoneg != AUTONEG_DISABLE && + cmd->base.autoneg != AUTONEG_ENABLE) + return -EINVAL; + if ((speed == SPEED_1000) && (!mii->supports_gmii)) + return -EINVAL; + + /* ignore supported, maxtxpkt, maxrxpkt */ + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + u32 bmcr, advert, tmp; + u32 advert2 = 0, tmp2 = 0; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32( + &advertising, cmd->link_modes.advertising); + + if ((advertising & (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full)) == 0) + return -EINVAL; + + /* advertise only what has been requested */ + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + if (mii->supports_gmii) { + advert2 = mii->mdio_read(dev, mii->phy_id, + MII_CTRL1000); + tmp2 = advert2 & + ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL); + } + tmp |= ethtool_adv_to_mii_adv_t(advertising); + + if (mii->supports_gmii) + tmp2 |= ethtool_adv_to_mii_ctrl1000_t(advertising); + if (advert != tmp) { + mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); + mii->advertising = tmp; + } + if ((mii->supports_gmii) && (advert2 != tmp2)) + mii->mdio_write(dev, mii->phy_id, MII_CTRL1000, tmp2); + + /* turn on autonegotiation, and force a renegotiate */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); + + mii->force_media = 0; + } else { + u32 bmcr, tmp; + + /* turn off auto negotiation, set speed and duplexity */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | + BMCR_SPEED1000 | BMCR_FULLDPLX); + if (speed == SPEED_1000) + tmp |= BMCR_SPEED1000; + else if (speed == SPEED_100) + tmp |= BMCR_SPEED100; + if (cmd->base.duplex == DUPLEX_FULL) { + tmp |= BMCR_FULLDPLX; + mii->full_duplex = 1; + } else { + mii->full_duplex = 0; + } + if (bmcr != tmp) + mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); + + mii->force_media = 1; + } + return 0; +} + +/** * mii_check_gmii_support - check if the MII supports Gb interfaces * @mii: the MII interface */ @@ -466,7 +661,9 @@ MODULE_LICENSE("GPL"); EXPORT_SYMBOL(mii_link_ok); EXPORT_SYMBOL(mii_nway_restart); EXPORT_SYMBOL(mii_ethtool_gset); +EXPORT_SYMBOL(mii_ethtool_get_link_ksettings); EXPORT_SYMBOL(mii_ethtool_sset); +EXPORT_SYMBOL(mii_ethtool_set_link_ksettings); EXPORT_SYMBOL(mii_check_link); EXPORT_SYMBOL(mii_check_media); EXPORT_SYMBOL(mii_check_gmii_support); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index a9acf7156855..36877ba65516 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -433,6 +433,9 @@ static int ntb_netdev_probe(struct device *client_dev) ndev->netdev_ops = &ntb_netdev_ops; ndev->ethtool_ops = &ntb_ethtool_ops; + ndev->min_mtu = 0; + ndev->max_mtu = ETH_MAX_MTU; + dev->qp = ntb_transport_create_queue(ndev, client_dev, &ntb_netdev_handlers); if (!dev->qp) { diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 2651c8d8de2f..d361835b315d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -15,6 +15,19 @@ if PHYLIB config SWPHY bool +config LED_TRIGGER_PHY + bool "Support LED triggers for tracking link state" + depends on LEDS_TRIGGERS + ---help--- + Adds support for a set of LED trigger events per-PHY. Link + state change will trigger the events, for consumption by an + LED class driver. There are triggers for each link speed currently + supported by the phy, and are of the form: + <mii bus id>:<phy>:<speed> + + Where speed is in the form: + <Speed in megabits>Mbps or <Speed in gigabits>Gbps + comment "MDIO bus device drivers" config MDIO_BCM_IPROC @@ -204,7 +217,7 @@ config BROADCOM_PHY select BCM_NET_PHYLIB ---help--- Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, - BCM5481 and BCM5482 PHYs. + BCM5481, BCM54810 and BCM5482 PHYs. config CICADA_PHY tristate "Cicada PHYs" @@ -264,6 +277,11 @@ config MARVELL_PHY ---help--- Currently has a driver for the 88E1011S +config MESON_GXL_PHY + tristate "Amlogic Meson GXL Internal PHY" + ---help--- + Currently has a driver for the Amlogic Meson GXL Internal PHY + config MICREL_PHY tristate "Micrel PHYs" ---help--- @@ -277,7 +295,7 @@ config MICROCHIP_PHY config MICROSEMI_PHY tristate "Microsemi PHYs" ---help--- - Currently supports the VSC8531 and VSC8541 PHYs + Currently supports VSC8530, VSC8531, VSC8540 and VSC8541 PHYs config NATIONAL_PHY tristate "National Semiconductor PHYs" diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index e58667d111e7..356859ac7c18 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,6 +2,7 @@ libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o libphy-$(CONFIG_SWPHY) += swphy.o +libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o obj-$(CONFIG_PHYLIB) += libphy.o @@ -41,6 +42,7 @@ obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MICROCHIP_PHY) += microchip.o diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index 09b0b0aa8d68..e8ae50e1255e 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -21,6 +21,8 @@ #define PHY_ID_AQ1202 0x03a1b445 #define PHY_ID_AQ2104 0x03a1b460 #define PHY_ID_AQR105 0x03a1b4a2 +#define PHY_ID_AQR106 0x03a1b4d0 +#define PHY_ID_AQR107 0x03a1b4e0 #define PHY_ID_AQR405 0x03a1b4b0 #define PHY_AQUANTIA_FEATURES (SUPPORTED_10000baseT_Full | \ @@ -154,6 +156,30 @@ static struct phy_driver aquantia_driver[] = { .read_status = aquantia_read_status, }, { + .phy_id = PHY_ID_AQR106, + .phy_id_mask = 0xfffffff0, + .name = "Aquantia AQR106", + .features = PHY_AQUANTIA_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .aneg_done = aquantia_aneg_done, + .config_aneg = aquantia_config_aneg, + .config_intr = aquantia_config_intr, + .ack_interrupt = aquantia_ack_interrupt, + .read_status = aquantia_read_status, +}, +{ + .phy_id = PHY_ID_AQR107, + .phy_id_mask = 0xfffffff0, + .name = "Aquantia AQR107", + .features = PHY_AQUANTIA_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .aneg_done = aquantia_aneg_done, + .config_aneg = aquantia_config_aneg, + .config_intr = aquantia_config_intr, + .ack_interrupt = aquantia_ack_interrupt, + .read_status = aquantia_read_status, +}, +{ .phy_id = PHY_ID_AQR405, .phy_id_mask = 0xfffffff0, .name = "Aquantia AQR405", @@ -173,6 +199,8 @@ static struct mdio_device_id __maybe_unused aquantia_tbl[] = { { PHY_ID_AQ1202, 0xfffffff0 }, { PHY_ID_AQ2104, 0xfffffff0 }, { PHY_ID_AQR105, 0xfffffff0 }, + { PHY_ID_AQR106, 0xfffffff0 }, + { PHY_ID_AQR107, 0xfffffff0 }, { PHY_ID_AQR405, 0xfffffff0 }, { } }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index a52b560e428b..c1e52b9dc58d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -63,6 +63,7 @@ #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 +#define AT803X_PHY_ID_MASK 0xffffffef MODULE_DESCRIPTION("Atheros 803x PHY driver"); MODULE_AUTHOR("Matus Ujhelyi"); @@ -398,7 +399,7 @@ static struct phy_driver at803x_driver[] = { /* ATHEROS 8035 */ .phy_id = ATH8035_PHY_ID, .name = "Atheros 8035 ethernet", - .phy_id_mask = 0xffffffef, + .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, @@ -415,7 +416,7 @@ static struct phy_driver at803x_driver[] = { /* ATHEROS 8030 */ .phy_id = ATH8030_PHY_ID, .name = "Atheros 8030 ethernet", - .phy_id_mask = 0xffffffef, + .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, @@ -433,7 +434,7 @@ static struct phy_driver at803x_driver[] = { /* ATHEROS 8031 */ .phy_id = ATH8031_PHY_ID, .name = "Atheros 8031 ethernet", - .phy_id_mask = 0xffffffef, + .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, @@ -452,9 +453,9 @@ static struct phy_driver at803x_driver[] = { module_phy_driver(at803x_driver); static struct mdio_device_id __maybe_unused atheros_tbl[] = { - { ATH8030_PHY_ID, 0xffffffef }, - { ATH8031_PHY_ID, 0xffffffef }, - { ATH8035_PHY_ID, 0xffffffef }, + { ATH8030_PHY_ID, AT803X_PHY_ID_MASK }, + { ATH8031_PHY_ID, AT803X_PHY_ID_MASK }, + { ATH8035_PHY_ID, AT803X_PHY_ID_MASK }, { } }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 870327efccf7..b1e32e9be1b3 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/brcmphy.h> - +#include <linux/of.h> #define BRCM_PHY_MODEL(phydev) \ ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask) @@ -30,11 +30,49 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +static int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum) +{ + /* The register must be written to both the Shadow Register Select and + * the Shadow Read Register Selector + */ + phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | + regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); + return phy_read(phydev, MII_BCM54XX_AUX_CTL); +} + static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val) { return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val); } +static int bcm54810_config(struct phy_device *phydev) +{ + int rc, val; + + val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL); + val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN; + rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL, + val); + if (rc < 0) + return rc; + + val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + val |= MII_BCM54XX_AUXCTL_MISC_WREN; + rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + val); + if (rc < 0) + return rc; + + val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); + val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + if (rc < 0) + return rc; + + return 0; +} + /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */ static int bcm50610_a0_workaround(struct phy_device *phydev) { @@ -207,6 +245,12 @@ static int bcm54xx_config_init(struct phy_device *phydev) (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE)) bcm54xx_adjust_rxrefclk(phydev); + if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) { + err = bcm54810_config(phydev); + if (err) + return err; + } + bcm54xx_phydsp_config(phydev); return 0; @@ -304,6 +348,7 @@ static int bcm5482_read_status(struct phy_device *phydev) static int bcm5481_config_aneg(struct phy_device *phydev) { + struct device_node *np = phydev->mdio.dev.of_node; int ret; /* Aneg firsly. */ @@ -334,6 +379,49 @@ static int bcm5481_config_aneg(struct phy_device *phydev) phy_write(phydev, 0x18, reg); } + if (of_property_read_bool(np, "enet-phy-lane-swap")) { + /* Lane Swap - Undocumented register...magic! */ + ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9, + 0x11B); + if (ret < 0) + return ret; + } + + return ret; +} + +static int bcm54612e_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* First, auto-negotiate. */ + ret = genphy_config_aneg(phydev); + + /* Clear TX internal delay unless requested. */ + if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && + (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) { + /* Disable TXD to GTXCLK clock delay (default set) */ + /* Bit 9 is the only field in shadow register 00011 */ + bcm_phy_write_shadow(phydev, 0x03, 0); + } + + /* Clear RX internal delay unless requested. */ + if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && + (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) { + u16 reg; + + /* Errata: reads require filling in the write selector field */ + bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC); + reg = phy_read(phydev, MII_BCM54XX_AUX_CTL); + /* Disable RXD to RXC delay (default set) */ + reg &= ~MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW; + /* Clear shadow selector field */ + reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK; + bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_WREN | reg); + } + return ret; } @@ -485,6 +573,18 @@ static struct phy_driver broadcom_drivers[] = { .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { + .phy_id = PHY_ID_BCM54612E, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54612E", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = bcm54612e_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, +}, { .phy_id = PHY_ID_BCM54616S, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", @@ -521,6 +621,18 @@ static struct phy_driver broadcom_drivers[] = { .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { + .phy_id = PHY_ID_BCM54810, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54810", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = bcm5481_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, +}, { .phy_id = PHY_ID_BCM5482, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5482", @@ -600,9 +712,11 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5411, 0xfffffff0 }, { PHY_ID_BCM5421, 0xfffffff0 }, { PHY_ID_BCM5461, 0xfffffff0 }, + { PHY_ID_BCM54612E, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5481, 0xfffffff0 }, + { PHY_ID_BCM54810, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 7a240fce3a7e..e2460a57e4b1 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -375,7 +375,7 @@ static int periodic_output(struct dp83640_clock *clock, /* ptp clock methods */ -static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ptp_dp83640_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct dp83640_clock *clock = container_of(ptp, struct dp83640_clock, caps); @@ -384,13 +384,13 @@ static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb) int neg_adj = 0; u16 hi, lo; - if (ppb < 0) { + if (scaled_ppm < 0) { neg_adj = 1; - ppb = -ppb; + scaled_ppm = -scaled_ppm; } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); + rate = scaled_ppm; + rate <<= 13; + rate = div_u64(rate, 15625); hi = (rate >> 16) & PTP_RATE_HI_MASK; if (neg_adj) @@ -1035,7 +1035,7 @@ static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus) clock->caps.n_per_out = N_PER_OUT; clock->caps.n_pins = DP83640_N_PINS; clock->caps.pps = 0; - clock->caps.adjfreq = ptp_dp83640_adjfreq; + clock->caps.adjfine = ptp_dp83640_adjfine; clock->caps.adjtime = ptp_dp83640_adjtime; clock->caps.gettime64 = ptp_dp83640_gettime; clock->caps.settime64 = ptp_dp83640_settime; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 91177a4a32ad..1b639242f9e2 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -33,6 +33,7 @@ /* Extended Registers */ #define DP83867_RGMIICTL 0x0032 #define DP83867_RGMIIDCTL 0x0086 +#define DP83867_IO_MUX_CFG 0x0170 #define DP83867_SW_RESET BIT(15) #define DP83867_SW_RESTART BIT(14) @@ -62,10 +63,17 @@ /* RGMIIDCTL bits */ #define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4 +/* IO_MUX_CFG bits */ +#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL 0x1f + +#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 +#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f + struct dp83867_private { int rx_id_delay; int tx_id_delay; int fifo_depth; + int io_impedance; }; static int dp83867_ack_interrupt(struct phy_device *phydev) @@ -111,6 +119,14 @@ static int dp83867_of_init(struct phy_device *phydev) if (!of_node) return -ENODEV; + dp83867->io_impedance = -EINVAL; + + /* Optional configuration */ + if (of_property_read_bool(of_node, "ti,max-output-impedance")) + dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; + else if (of_property_read_bool(of_node, "ti,min-output-impedance")) + dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN; + ret = of_property_read_u32(of_node, "ti,rx-internal-delay", &dp83867->rx_id_delay); if (ret) @@ -184,6 +200,18 @@ static int dp83867_config_init(struct phy_device *phydev) phy_write_mmd_indirect(phydev, DP83867_RGMIIDCTL, DP83867_DEVADDR, delay); + + if (dp83867->io_impedance >= 0) { + val = phy_read_mmd_indirect(phydev, DP83867_IO_MUX_CFG, + DP83867_DEVADDR); + + val &= ~DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL; + val |= dp83867->io_impedance & + DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL; + + phy_write_mmd_indirect(phydev, DP83867_IO_MUX_CFG, + DP83867_DEVADDR, val); + } } return 0; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index c2dcf02df202..fa31f50824d3 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -361,7 +361,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev) static int marvell_of_reg_init(struct phy_device *phydev) { const __be32 *paddr; - int len, i, saved_page, current_page, page_changed, ret; + int len, i, saved_page, current_page, ret; if (!phydev->mdio.dev.of_node) return 0; @@ -374,7 +374,6 @@ static int marvell_of_reg_init(struct phy_device *phydev) saved_page = phy_read(phydev, MII_MARVELL_PHY_PAGE); if (saved_page < 0) return saved_page; - page_changed = 0; current_page = saved_page; ret = 0; @@ -388,7 +387,6 @@ static int marvell_of_reg_init(struct phy_device *phydev) if (reg_page != current_page) { current_page = reg_page; - page_changed = 1; ret = phy_write(phydev, MII_MARVELL_PHY_PAGE, reg_page); if (ret < 0) goto err; @@ -411,7 +409,7 @@ static int marvell_of_reg_init(struct phy_device *phydev) } err: - if (page_changed) { + if (current_page != saved_page) { i = phy_write(phydev, MII_MARVELL_PHY_PAGE, saved_page); if (ret == 0) ret = i; diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c index d0bed52c8d16..6a33646bdf05 100644 --- a/drivers/net/phy/mdio-mux-mmioreg.c +++ b/drivers/net/phy/mdio-mux-mmioreg.c @@ -21,7 +21,8 @@ struct mdio_mux_mmioreg_state { void *mux_handle; phys_addr_t phys; - uint8_t mask; + unsigned int iosize; + unsigned int mask; }; /* @@ -47,17 +48,47 @@ static int mdio_mux_mmioreg_switch_fn(int current_child, int desired_child, struct mdio_mux_mmioreg_state *s = data; if (current_child ^ desired_child) { - void __iomem *p = ioremap(s->phys, 1); - uint8_t x, y; - + void __iomem *p = ioremap(s->phys, s->iosize); if (!p) return -ENOMEM; - x = ioread8(p); - y = (x & ~s->mask) | desired_child; - if (x != y) { - iowrite8((x & ~s->mask) | desired_child, p); - pr_debug("%s: %02x -> %02x\n", __func__, x, y); + switch (s->iosize) { + case sizeof(uint8_t): { + uint8_t x, y; + + x = ioread8(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite8((x & ~s->mask) | desired_child, p); + pr_debug("%s: %02x -> %02x\n", __func__, x, y); + } + + break; + } + case sizeof(uint16_t): { + uint16_t x, y; + + x = ioread16(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite16((x & ~s->mask) | desired_child, p); + pr_debug("%s: %04x -> %04x\n", __func__, x, y); + } + + break; + } + case sizeof(uint32_t): { + uint32_t x, y; + + x = ioread32(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite32((x & ~s->mask) | desired_child, p); + pr_debug("%s: %08x -> %08x\n", __func__, x, y); + } + + break; + } } iounmap(p); @@ -88,8 +119,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) } s->phys = res.start; - if (resource_size(&res) != sizeof(uint8_t)) { - dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + s->iosize = resource_size(&res); + if (s->iosize != sizeof(uint8_t) && + s->iosize != sizeof(uint16_t) && + s->iosize != sizeof(uint32_t)) { + dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n"); return -EINVAL; } @@ -98,8 +132,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) dev_err(&pdev->dev, "missing or invalid mux-mask property\n"); return -ENODEV; } - if (be32_to_cpup(iprop) > 255) { - dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + if (be32_to_cpup(iprop) >= BIT(s->iosize * 8)) { + dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n"); return -EINVAL; } s->mask = be32_to_cpup(iprop); diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 9c88e6749b9a..43c8fd46504b 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -144,7 +144,7 @@ int mdio_driver_register(struct mdio_driver *drv) struct mdio_driver_common *mdiodrv = &drv->mdiodrv; int retval; - pr_info("mdio_driver_register: %s\n", mdiodrv->driver.name); + pr_debug("mdio_driver_register: %s\n", mdiodrv->driver.name); mdiodrv->driver.bus = &mdio_bus_type; mdiodrv->driver.probe = mdio_probe; diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c new file mode 100644 index 000000000000..1ea69b7585d9 --- /dev/null +++ b/drivers/net/phy/meson-gxl.c @@ -0,0 +1,81 @@ +/* + * Amlogic Meson GXL Internal PHY Driver + * + * Copyright (C) 2015 Amlogic, Inc. All rights reserved. + * Copyright (C) 2016 BayLibre, SAS. All rights reserved. + * Author: Neil Armstrong <narmstrong@baylibre.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mii.h> +#include <linux/ethtool.h> +#include <linux/phy.h> +#include <linux/netdevice.h> + +static int meson_gxl_config_init(struct phy_device *phydev) +{ + /* Enable Analog and DSP register Bank access by */ + phy_write(phydev, 0x14, 0x0000); + phy_write(phydev, 0x14, 0x0400); + phy_write(phydev, 0x14, 0x0000); + phy_write(phydev, 0x14, 0x0400); + + /* Write Analog register 23 */ + phy_write(phydev, 0x17, 0x8E0D); + phy_write(phydev, 0x14, 0x4417); + + /* Enable fractional PLL */ + phy_write(phydev, 0x17, 0x0005); + phy_write(phydev, 0x14, 0x5C1B); + + /* Program fraction FR_PLL_DIV1 */ + phy_write(phydev, 0x17, 0x029A); + phy_write(phydev, 0x14, 0x5C1D); + + /* Program fraction FR_PLL_DIV1 */ + phy_write(phydev, 0x17, 0xAAAA); + phy_write(phydev, 0x14, 0x5C1C); + + return 0; +} + +static struct phy_driver meson_gxl_phy[] = { + { + .phy_id = 0x01814400, + .phy_id_mask = 0xfffffff0, + .name = "Meson GXL Internal PHY", + .features = PHY_BASIC_FEATURES, + .flags = PHY_IS_INTERNAL, + .config_init = meson_gxl_config_init, + .config_aneg = genphy_config_aneg, + .aneg_done = genphy_aneg_done, + .read_status = genphy_read_status, + .suspend = genphy_suspend, + .resume = genphy_resume, + }, +}; + +static struct mdio_device_id __maybe_unused meson_gxl_tbl[] = { + { 0x01814400, 0xfffffff0 }, + { } +}; + +module_phy_driver(meson_gxl_phy); + +MODULE_DEVICE_TABLE(mdio, meson_gxl_tbl); + +MODULE_DESCRIPTION("Amlogic Meson GXL Internal PHY driver"); +MODULE_AUTHOR("Baoqi wang"); +MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 77a6671d572e..d0026ab8a220 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -12,7 +12,6 @@ #include <linux/mii.h> #include <linux/phy.h> #include <linux/of.h> -#include <dt-bindings/net/mscc-phy-vsc8531.h> #include <linux/netdevice.h> enum rgmii_rx_clock_delay { @@ -66,25 +65,33 @@ enum rgmii_rx_clock_delay { #define SECURE_ON_PASSWD_LEN_4 0x4000 /* Microsemi PHY ID's */ +#define PHY_ID_VSC8530 0x00070560 #define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8540 0x00070760 #define PHY_ID_VSC8541 0x00070770 -struct edge_rate_table { - u16 vddmac; - int slowdown[MSCC_SLOWDOWN_MAX]; -}; +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_3300 3300 -struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = { - {3300, { 0, -2, -4, -7, -10, -17, -29, -53} }, - {2500, { 0, -3, -6, -10, -14, -23, -37, -63} }, - {1800, { 0, -5, -9, -16, -23, -35, -52, -76} }, - {1500, { 0, -6, -14, -21, -29, -42, -58, -77} }, +struct vsc8531_private { + int rate_magic; }; -struct vsc8531_private { - u8 edge_slowdown; +#ifdef CONFIG_OF_MDIO +struct vsc8531_edge_rate_table { u16 vddmac; + u8 slowdown[8]; +}; + +static const struct vsc8531_edge_rate_table edge_table[] = { + {MSCC_VDDMAC_3300, { 0, 2, 4, 7, 10, 17, 29, 53} }, + {MSCC_VDDMAC_2500, { 0, 3, 6, 10, 14, 23, 37, 63} }, + {MSCC_VDDMAC_1800, { 0, 5, 9, 16, 23, 35, 52, 76} }, + {MSCC_VDDMAC_1500, { 0, 6, 14, 21, 29, 42, 58, 77} }, }; +#endif /* CONFIG_OF_MDIO */ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { @@ -205,29 +212,43 @@ out_unlock: mutex_unlock(&phydev->lock); } -static u8 edge_rate_magic_get(u16 vddmac, - int slowdown) +#ifdef CONFIG_OF_MDIO +static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) { - int rc = (MSCC_SLOWDOWN_MAX - 1); - u8 vdd; u8 sd; + u16 vdd; + int rc, i, j; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + u8 sd_array_size = ARRAY_SIZE(edge_table[0].slowdown); - for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) { - if (edge_table[vdd].vddmac == vddmac) { - for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) { - if (edge_table[vdd].slowdown[sd] <= slowdown) { - rc = (MSCC_SLOWDOWN_MAX - sd - 1); - break; - } - } - } - } + if (!of_node) + return -ENODEV; - return rc; + rc = of_property_read_u16(of_node, "vsc8531,vddmac", &vdd); + if (rc != 0) + vdd = MSCC_VDDMAC_3300; + + rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", &sd); + if (rc != 0) + sd = 0; + + for (i = 0; i < ARRAY_SIZE(edge_table); i++) + if (edge_table[i].vddmac == vdd) + for (j = 0; j < sd_array_size; j++) + if (edge_table[i].slowdown[j] == sd) + return (sd_array_size - j - 1); + + return -EINVAL; +} +#else +static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) +{ + return 0; } +#endif /* CONFIG_OF_MDIO */ -static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, - u8 edge_rate) +static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, u8 edge_rate) { int rc; u16 reg_val; @@ -308,45 +329,10 @@ out_unlock: return rc; } -#ifdef CONFIG_OF_MDIO -static int vsc8531_of_init(struct phy_device *phydev) -{ - int rc; - struct vsc8531_private *vsc8531 = phydev->priv; - struct device *dev = &phydev->mdio.dev; - struct device_node *of_node = dev->of_node; - - if (!of_node) - return -ENODEV; - - rc = of_property_read_u16(of_node, "vsc8531,vddmac", - &vsc8531->vddmac); - if (rc == -EINVAL) - vsc8531->vddmac = MSCC_VDDMAC_3300; - rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", - &vsc8531->edge_slowdown); - if (rc == -EINVAL) - vsc8531->edge_slowdown = 0; - - rc = 0; - return rc; -} -#else -static int vsc8531_of_init(struct phy_device *phydev) -{ - return 0; -} -#endif /* CONFIG_OF_MDIO */ - static int vsc85xx_config_init(struct phy_device *phydev) { int rc; struct vsc8531_private *vsc8531 = phydev->priv; - u8 edge_rate; - - rc = vsc8531_of_init(phydev); - if (rc) - return rc; rc = vsc85xx_default_config(phydev); if (rc) @@ -356,9 +342,7 @@ static int vsc85xx_config_init(struct phy_device *phydev) if (rc) return rc; - edge_rate = edge_rate_magic_get(vsc8531->vddmac, - -(int)vsc8531->edge_slowdown); - rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate); + rc = vsc85xx_edge_rate_cntl_set(phydev, vsc8531->rate_magic); if (rc) return rc; @@ -396,20 +380,46 @@ static int vsc85xx_config_intr(struct phy_device *phydev) static int vsc85xx_probe(struct phy_device *phydev) { + int rate_magic; struct vsc8531_private *vsc8531; + rate_magic = vsc85xx_edge_rate_magic_get(phydev); + if (rate_magic < 0) + return rate_magic; + vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); if (!vsc8531) return -ENOMEM; phydev->priv = vsc8531; + vsc8531->rate_magic = rate_magic; + return 0; } /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { + .phy_id = PHY_ID_VSC8530, + .name = "Microsemi FE VSC8530", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, + .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, +}, +{ .phy_id = PHY_ID_VSC8531, .name = "Microsemi VSC8531", .phy_id_mask = 0xfffffff0, @@ -424,9 +434,28 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, - .probe = &vsc85xx_probe, - .set_wol = &vsc85xx_wol_set, - .get_wol = &vsc85xx_wol_get, + .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, +}, +{ + .phy_id = PHY_ID_VSC8540, + .name = "Microsemi FE VSC8540 SyncE", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, + .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, }, { .phy_id = PHY_ID_VSC8541, @@ -443,9 +472,9 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, - .probe = &vsc85xx_probe, - .set_wol = &vsc85xx_wol_set, - .get_wol = &vsc85xx_wol_get, + .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, } }; @@ -453,7 +482,9 @@ static struct phy_driver vsc85xx_driver[] = { module_phy_driver(vsc85xx_driver); static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { + { PHY_ID_VSC8530, 0xfffffff0, }, { PHY_ID_VSC8531, 0xfffffff0, }, + { PHY_ID_VSC8540, 0xfffffff0, }, { PHY_ID_VSC8541, 0xfffffff0, }, { } }; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f424b867f73e..e6dd222fddb1 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -143,13 +143,14 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) * Returns > 0 on success or < 0 on error. 0 means that auto-negotiation * is still pending. */ -static inline int phy_aneg_done(struct phy_device *phydev) +int phy_aneg_done(struct phy_device *phydev) { if (phydev->drv->aneg_done) return phydev->drv->aneg_done(phydev); return genphy_aneg_done(phydev); } +EXPORT_SYMBOL(phy_aneg_done); /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value @@ -261,6 +262,41 @@ static inline unsigned int phy_find_valid(unsigned int idx, u32 features) } /** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and fills the speeds + * buffer with the supported speeds. If speeds buffer is too small to contain + * all currently supported speeds, will return as many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size) +{ + unsigned int count = 0; + unsigned int idx = 0; + + while (idx < MAX_NUM_SETTINGS && count < size) { + idx = phy_find_valid(idx, phy->supported); + + if (!(settings[idx].setting & phy->supported)) + break; + + /* Assumes settings are grouped by speed */ + if ((count == 0) || + (speeds[count - 1] != settings[idx].speed)) { + speeds[count] = settings[idx].speed; + count++; + } + idx++; + } + + return count; +} + +/** * phy_check_valid - check if there is a valid PHY setting which matches * speed, duplex, and feature mask * @speed: speed to match @@ -664,7 +700,7 @@ static void phy_error(struct phy_device *phydev) * @phy_dat: phy_device pointer * * Description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. + * interrupts, and uses phy_change to handle the interrupt. */ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { @@ -673,15 +709,10 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (PHY_HALTED == phydev->state) return IRQ_NONE; /* It can't be ours. */ - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. - */ disable_irq_nosync(irq); atomic_inc(&phydev->irq_disable); - queue_work(system_power_efficient_wq, &phydev->phy_queue); + phy_change(phydev); return IRQ_HANDLED; } @@ -739,10 +770,9 @@ phy_err: int phy_start_interrupts(struct phy_device *phydev) { atomic_set(&phydev->irq_disable, 0); - if (request_irq(phydev->irq, phy_interrupt, - IRQF_SHARED, - "phy_interrupt", - phydev) < 0) { + if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, + IRQF_ONESHOT | IRQF_SHARED, + phydev_name(phydev), phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", phydev->mdio.bus->name, phydev->irq); phydev->irq = PHY_POLL; @@ -766,12 +796,6 @@ int phy_stop_interrupts(struct phy_device *phydev) free_irq(phydev->irq, phydev); - /* Cannot call flush_scheduled_work() here as desired because - * of rtnl_lock(), but we do not really care about what would - * be done, except from enable_irq(), so cancel any work - * possibly pending and take care of the matter below. - */ - cancel_work_sync(&phydev->phy_queue); /* If work indeed has been cancelled, disable_irq() will have * been left unbalanced from phy_interrupt() and enable_irq() * has to be called so that other devices on the line work. @@ -784,14 +808,11 @@ int phy_stop_interrupts(struct phy_device *phydev) EXPORT_SYMBOL(phy_stop_interrupts); /** - * phy_change - Scheduled by the phy_interrupt/timer to handle PHY changes - * @work: work_struct that describes the work to be done + * phy_change - Called by the phy_interrupt to handle PHY changes + * @phydev: phy_device struct that interrupted */ -void phy_change(struct work_struct *work) +void phy_change(struct phy_device *phydev) { - struct phy_device *phydev = - container_of(work, struct phy_device, phy_queue); - if (phy_interrupt_is_valid(phydev)) { if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) @@ -833,6 +854,18 @@ phy_err: } /** + * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes + * @work: work_struct that describes the work to be done + */ +void phy_change_work(struct work_struct *work) +{ + struct phy_device *phydev = + container_of(work, struct phy_device, phy_queue); + + phy_change(phydev); +} + +/** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct */ @@ -911,6 +944,12 @@ void phy_start(struct phy_device *phydev) } EXPORT_SYMBOL(phy_start); +static void phy_adjust_link(struct phy_device *phydev) +{ + phydev->adjust_link(phydev->attached_dev); + phy_led_trigger_change_speed(phydev); +} + /** * phy_state_machine - Handle the state machine * @work: work_struct that describes the work to be done @@ -953,7 +992,7 @@ void phy_state_machine(struct work_struct *work) if (!phydev->link) { phydev->state = PHY_NOLINK; netif_carrier_off(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); break; } @@ -966,7 +1005,7 @@ void phy_state_machine(struct work_struct *work) if (err > 0) { phydev->state = PHY_RUNNING; netif_carrier_on(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); } else if (0 == phydev->link_timeout--) needs_aneg = true; @@ -993,7 +1032,7 @@ void phy_state_machine(struct work_struct *work) } phydev->state = PHY_RUNNING; netif_carrier_on(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); } break; case PHY_FORCING: @@ -1009,7 +1048,7 @@ void phy_state_machine(struct work_struct *work) needs_aneg = true; } - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); break; case PHY_RUNNING: /* Only register a CHANGE if we are polling and link changed @@ -1038,7 +1077,7 @@ void phy_state_machine(struct work_struct *work) netif_carrier_off(phydev->attached_dev); } - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); if (phy_interrupt_is_valid(phydev)) err = phy_config_interrupt(phydev, @@ -1048,7 +1087,7 @@ void phy_state_machine(struct work_struct *work) if (phydev->link) { phydev->link = 0; netif_carrier_off(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); do_suspend = true; } break; @@ -1072,7 +1111,7 @@ void phy_state_machine(struct work_struct *work) } else { phydev->state = PHY_NOLINK; } - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); } else { phydev->state = PHY_AN; phydev->link_timeout = PHY_AN_TIMEOUT; @@ -1088,7 +1127,7 @@ void phy_state_machine(struct work_struct *work) } else { phydev->state = PHY_NOLINK; } - phydev->adjust_link(phydev->attached_dev); + phy_adjust_link(phydev); } break; } @@ -1116,6 +1155,15 @@ void phy_state_machine(struct work_struct *work) PHY_STATE_TIME * HZ); } +/** + * phy_mac_interrupt - MAC says the link has changed + * @phydev: phy_device struct with changed link + * @new_link: Link is Up/Down. + * + * Description: The MAC layer is able indicate there has been a change + * in the PHY link status. Set the new link status, and trigger the + * state machine, work a work queue. + */ void phy_mac_interrupt(struct phy_device *phydev, int new_link) { phydev->link = new_link; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1a4bf8acad78..9e8f048891bd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -30,6 +30,7 @@ #include <linux/mii.h> #include <linux/ethtool.h> #include <linux/phy.h> +#include <linux/phy_led_triggers.h> #include <linux/mdio.h> #include <linux/io.h> #include <linux/uaccess.h> @@ -347,7 +348,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mutex_init(&dev->lock); INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); - INIT_WORK(&dev->phy_queue, phy_change); + INIT_WORK(&dev->phy_queue, phy_change_work); /* Request the appropriate module unconditionally; don't * bother trying to do so only if it isn't already loaded, @@ -917,6 +918,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, else phy_resume(phydev); + phy_led_triggers_register(phydev); + return err; error: @@ -991,6 +994,8 @@ void phy_detach(struct phy_device *phydev) } } + phy_led_triggers_unregister(phydev); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c new file mode 100644 index 000000000000..cda600a1b766 --- /dev/null +++ b/drivers/net/phy/phy_led_triggers.c @@ -0,0 +1,136 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/leds.h> +#include <linux/phy.h> +#include <linux/netdevice.h> + +static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy, + unsigned int speed) +{ + unsigned int i; + + for (i = 0; i < phy->phy_num_led_triggers; i++) { + if (phy->phy_led_triggers[i].speed == speed) + return &phy->phy_led_triggers[i]; + } + return NULL; +} + +void phy_led_trigger_change_speed(struct phy_device *phy) +{ + struct phy_led_trigger *plt; + + if (!phy->link) + goto out_change_speed; + + if (phy->speed == 0) + return; + + plt = phy_speed_to_led_trigger(phy, phy->speed); + if (!plt) { + netdev_alert(phy->attached_dev, + "No phy led trigger registered for speed(%d)\n", + phy->speed); + goto out_change_speed; + } + + if (plt != phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); + led_trigger_event(&plt->trigger, LED_FULL); + phy->last_triggered = plt; + } + return; + +out_change_speed: + if (phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, + LED_OFF); + phy->last_triggered = NULL; + } +} +EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); + +static int phy_led_trigger_register(struct phy_device *phy, + struct phy_led_trigger *plt, + unsigned int speed) +{ + char name_suffix[PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE]; + + plt->speed = speed; + + if (speed < SPEED_1000) + snprintf(name_suffix, sizeof(name_suffix), "%dMbps", speed); + else if (speed == SPEED_2500) + snprintf(name_suffix, sizeof(name_suffix), "2.5Gbps"); + else + snprintf(name_suffix, sizeof(name_suffix), "%dGbps", + DIV_ROUND_CLOSEST(speed, 1000)); + + snprintf(plt->name, sizeof(plt->name), PHY_ID_FMT ":%s", + phy->mdio.bus->id, phy->mdio.addr, name_suffix); + plt->trigger.name = plt->name; + + return led_trigger_register(&plt->trigger); +} + +static void phy_led_trigger_unregister(struct phy_led_trigger *plt) +{ + led_trigger_unregister(&plt->trigger); +} + +int phy_led_triggers_register(struct phy_device *phy) +{ + int i, err; + unsigned int speeds[50]; + + phy->phy_num_led_triggers = phy_supported_speeds(phy, speeds, + ARRAY_SIZE(speeds)); + if (!phy->phy_num_led_triggers) + return 0; + + phy->phy_led_triggers = devm_kzalloc(&phy->mdio.dev, + sizeof(struct phy_led_trigger) * + phy->phy_num_led_triggers, + GFP_KERNEL); + if (!phy->phy_led_triggers) + return -ENOMEM; + + for (i = 0; i < phy->phy_num_led_triggers; i++) { + err = phy_led_trigger_register(phy, &phy->phy_led_triggers[i], + speeds[i]); + if (err) + goto out_unreg; + } + + phy->last_triggered = NULL; + phy_led_trigger_change_speed(phy); + + return 0; +out_unreg: + while (i--) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); + return err; +} +EXPORT_SYMBOL_GPL(phy_led_triggers_register); + +void phy_led_triggers_unregister(struct phy_device *phy) +{ + int i; + + for (i = 0; i < phy->phy_num_led_triggers; i++) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + + devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); +} +EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 9c4b41a4df7d..3c55ea357f35 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -270,7 +270,6 @@ static const struct net_device_ops plip_netdev_ops = { .ndo_stop = plip_close, .ndo_start_xmit = plip_tx_packet, .ndo_do_ioctl = plip_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index a31f4610b493..300bb1479b3a 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -466,17 +466,6 @@ static void rionet_set_msglevel(struct net_device *ndev, u32 value) rnet->msg_enable = value; } -static int rionet_change_mtu(struct net_device *ndev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) { - printk(KERN_ERR "%s: Invalid MTU size %d\n", - ndev->name, new_mtu); - return -EINVAL; - } - ndev->mtu = new_mtu; - return 0; -} - static const struct ethtool_ops rionet_ethtool_ops = { .get_drvinfo = rionet_get_drvinfo, .get_msglevel = rionet_get_msglevel, @@ -488,7 +477,6 @@ static const struct net_device_ops rionet_netdev_ops = { .ndo_open = rionet_open, .ndo_stop = rionet_close, .ndo_start_xmit = rionet_start_xmit, - .ndo_change_mtu = rionet_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; @@ -525,6 +513,9 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) ndev->netdev_ops = &rionet_netdev_ops; ndev->mtu = RIONET_MAX_MTU; + /* MTU range: 68 - 4082 */ + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = RIONET_MAX_MTU; ndev->features = NETIF_F_LLTX; SET_NETDEV_DEV(ndev, &mport->dev); ndev->ethtool_ops = &rionet_ethtool_ops; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index aad0b59d41e3..8b8b53259783 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -141,7 +141,6 @@ static const struct net_device_ops sb1000_netdev_ops = { .ndo_start_xmit = sb1000_start_xmit, .ndo_do_ioctl = sb1000_dev_ioctl, .ndo_stop = sb1000_close, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 9ed6d1c1ee45..7e933d8ff811 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -561,12 +561,7 @@ static int sl_change_mtu(struct net_device *dev, int new_mtu) { struct slip *sl = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > 65534) - return -EINVAL; - - if (new_mtu != dev->mtu) - return sl_realloc_bufs(sl, new_mtu); - return 0; + return sl_realloc_bufs(sl, new_mtu); } /* Netdevice get statistics request */ @@ -663,6 +658,10 @@ static void sl_setup(struct net_device *dev) dev->addr_len = 0; dev->tx_queue_len = 10; + /* MTU range: 68 - 65534 */ + dev->min_mtu = 68; + dev->max_mtu = 65534; + /* New-style flags. */ dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a380649bf6b5..bdc58567d10e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2150,13 +2150,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { * Generic netlink custom interface ***********************************/ -static struct genl_family team_nl_family = { - .id = GENL_ID_GENERATE, - .name = TEAM_GENL_NAME, - .version = TEAM_GENL_VERSION, - .maxattr = TEAM_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family team_nl_family; static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = { [TEAM_ATTR_UNSPEC] = { .type = NLA_UNSPEC, }, @@ -2746,6 +2740,18 @@ static const struct genl_multicast_group team_nl_mcgrps[] = { { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, }; +static struct genl_family team_nl_family __ro_after_init = { + .name = TEAM_GENL_NAME, + .version = TEAM_GENL_VERSION, + .maxattr = TEAM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = team_nl_ops, + .n_ops = ARRAY_SIZE(team_nl_ops), + .mcgrps = team_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(team_nl_mcgrps), +}; + static int team_nl_send_multicast(struct sk_buff *skb, struct team *team, u32 portid) { @@ -2767,10 +2773,9 @@ static int team_nl_send_event_port_get(struct team *team, port); } -static int team_nl_init(void) +static int __init team_nl_init(void) { - return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops, - team_nl_mcgrps); + return genl_register_family(&team_nl_family); } static void team_nl_fini(void) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8093e39ae263..1588469844e7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -925,18 +925,6 @@ static void tun_net_mclist(struct net_device *dev) */ } -#define MIN_MTU 68 -#define MAX_MTU 65535 - -static int -tun_net_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static netdev_features_t tun_net_fix_features(struct net_device *dev, netdev_features_t features) { @@ -1014,7 +1002,6 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, - .ndo_change_mtu = tun_net_change_mtu, .ndo_fix_features = tun_net_fix_features, .ndo_select_queue = tun_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1029,7 +1016,6 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, - .ndo_change_mtu = tun_net_change_mtu, .ndo_fix_features = tun_net_fix_features, .ndo_set_rx_mode = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, @@ -1062,6 +1048,9 @@ static void tun_flow_uninit(struct tun_struct *tun) tun_flow_flush(tun); } +#define MIN_MTU 68 +#define MAX_MTU 65535 + /* Initialize net device. */ static void tun_net_init(struct net_device *dev) { @@ -1092,6 +1081,9 @@ static void tun_net_init(struct net_device *dev) break; } + + dev->min_mtu = MIN_MTU; + dev->max_mtu = MAX_MTU - dev->hard_header_len; } /* Character device part */ @@ -1993,7 +1985,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int le; int ret; - if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -2013,7 +2005,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, rtnl_lock(); tun = __tun_get(tfile); - if (cmd == TUNSETIFF && !tun) { + if (cmd == TUNSETIFF) { + ret = -EEXIST; + if (tun) + goto unlock; + ifr.ifr_name[IFNAMSIZ-1] = '\0'; ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index cce24950a0ab..7363cc5dd1bf 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1026,9 +1026,6 @@ static int ax88178_change_mtu(struct net_device *net, int new_mtu) netdev_dbg(dev->net, "ax88178_change_mtu() new_mtu=%d\n", new_mtu); - if (new_mtu <= 0 || ll_mtu > 16384) - return -EINVAL; - if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; @@ -1081,6 +1078,7 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->netdev_ops = &ax88178_netdev_ops; dev->net->ethtool_ops = &ax88178_ethtool_ops; + dev->net->max_mtu = 16384 - (dev->net->hard_header_len + 4); /* Blink LEDS so users know driver saw dongle */ asix_sw_reset(dev, 0, 0); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 8a6675d92b98..a3a7db0702d8 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -907,9 +907,6 @@ static int ax88179_change_mtu(struct net_device *net, int new_mtu) struct usbnet *dev = netdev_priv(net); u16 tmp16; - if (new_mtu <= 0 || new_mtu > 4088) - return -EINVAL; - net->mtu = new_mtu; dev->hard_mtu = net->mtu + net->hard_header_len; @@ -1266,6 +1263,7 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->netdev_ops = &ax88179_netdev_ops; dev->net->ethtool_ops = &ax88179_ethtool_ops; dev->net->needed_headroom = 8; + dev->net->max_mtu = 4088; /* Initialize MII structure */ dev->mii.dev = dev->net; diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index d9ca05d3ac8e..a1f2f6f1e614 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -761,7 +761,6 @@ static const struct net_device_ops catc_netdev_ops = { .ndo_tx_timeout = catc_tx_timeout, .ndo_set_rx_mode = catc_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index ff2270ead2e6..eb52de8205f0 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -276,21 +276,11 @@ static int usbpn_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -ENOIOCTLCMD; } -static int usbpn_set_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU)) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops usbpn_ops = { .ndo_open = usbpn_open, .ndo_stop = usbpn_close, .ndo_start_xmit = usbpn_xmit, .ndo_do_ioctl = usbpn_ioctl, - .ndo_change_mtu = usbpn_set_mtu, }; static void usbpn_setup(struct net_device *dev) @@ -301,6 +291,8 @@ static void usbpn_setup(struct net_device *dev) dev->type = ARPHRD_PHONET; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = PHONET_MAX_MTU; + dev->min_mtu = PHONET_MIN_MTU; + dev->max_mtu = PHONET_MAX_MTU; dev->hard_header_len = 1; dev->dev_addr[0] = PN_MEDIA_USB; dev->addr_len = 1; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index c47ec0a04c8e..45e5e4332a28 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -687,6 +687,20 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* ThinkPad USB-C Dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* ThinkPad Thunderbolt 3 Dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3069, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM, @@ -694,6 +708,20 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo USB C to Ethernet Adapter (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x720c, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Lenovo USB-C Travel Hub (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7214, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 877c9516e781..7141817946f0 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -740,10 +740,6 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) { struct usbnet *dev = netdev_priv(net); - int maxmtu = cdc_ncm_max_dgram_size(dev) - cdc_ncm_eth_hlen(dev); - - if (new_mtu <= 0 || new_mtu > maxmtu) - return -EINVAL; net->mtu = new_mtu; cdc_ncm_set_dgram_size(dev, new_mtu + cdc_ncm_eth_hlen(dev)); @@ -909,6 +905,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* must handle MTU changes */ dev->net->netdev_ops = &cdc_ncm_netdev_ops; + dev->net->max_mtu = cdc_ncm_max_dgram_size(dev) - cdc_ncm_eth_hlen(dev); return 0; diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 66b34ddbe216..338aed5da14d 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -982,7 +982,6 @@ static const struct net_device_ops kaweth_netdev_ops = { .ndo_tx_timeout = kaweth_tx_timeout, .ndo_set_rx_mode = kaweth_set_rx_mode, .ndo_get_stats = kaweth_netdev_stats, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index db558b8b32fe..bcd9010c1f27 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -30,13 +30,17 @@ #include <linux/ipv6.h> #include <linux/mdio.h> #include <net/ip6_checksum.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <linux/irq.h> +#include <linux/irqchip/chained_irq.h> #include <linux/microchipphy.h> #include "lan78xx.h" #define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>" #define DRIVER_DESC "LAN78XX USB 3.0 Gigabit Ethernet Devices" #define DRIVER_NAME "lan78xx" -#define DRIVER_VERSION "1.0.4" +#define DRIVER_VERSION "1.0.5" #define TX_TIMEOUT_JIFFIES (5 * HZ) #define THROTTLE_JIFFIES (HZ / 8) @@ -89,6 +93,38 @@ /* statistic update interval (mSec) */ #define STAT_UPDATE_TIMER (1 * 1000) +/* defines interrupts from interrupt EP */ +#define MAX_INT_EP (32) +#define INT_EP_INTEP (31) +#define INT_EP_OTP_WR_DONE (28) +#define INT_EP_EEE_TX_LPI_START (26) +#define INT_EP_EEE_TX_LPI_STOP (25) +#define INT_EP_EEE_RX_LPI (24) +#define INT_EP_MAC_RESET_TIMEOUT (23) +#define INT_EP_RDFO (22) +#define INT_EP_TXE (21) +#define INT_EP_USB_STATUS (20) +#define INT_EP_TX_DIS (19) +#define INT_EP_RX_DIS (18) +#define INT_EP_PHY (17) +#define INT_EP_DP (16) +#define INT_EP_MAC_ERR (15) +#define INT_EP_TDFU (14) +#define INT_EP_TDFO (13) +#define INT_EP_UTX (12) +#define INT_EP_GPIO_11 (11) +#define INT_EP_GPIO_10 (10) +#define INT_EP_GPIO_9 (9) +#define INT_EP_GPIO_8 (8) +#define INT_EP_GPIO_7 (7) +#define INT_EP_GPIO_6 (6) +#define INT_EP_GPIO_5 (5) +#define INT_EP_GPIO_4 (4) +#define INT_EP_GPIO_3 (3) +#define INT_EP_GPIO_2 (2) +#define INT_EP_GPIO_1 (1) +#define INT_EP_GPIO_0 (0) + static const char lan78xx_gstrings[][ETH_GSTRING_LEN] = { "RX FCS Errors", "RX Alignment Errors", @@ -296,6 +332,15 @@ struct statstage { struct lan78xx_statstage64 curr_stat; }; +struct irq_domain_data { + struct irq_domain *irqdomain; + unsigned int phyirq; + struct irq_chip *irqchip; + irq_flow_handler_t irq_handler; + u32 irqenable; + struct mutex irq_lock; /* for irq bus access */ +}; + struct lan78xx_net { struct net_device *net; struct usb_device *udev; @@ -351,6 +396,8 @@ struct lan78xx_net { int delta; struct statstage stats; + + struct irq_domain_data domain_data; }; /* use ethtool to change the level for any given device */ @@ -1092,15 +1139,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; + struct ethtool_link_ksettings ecmd; int ladv, radv, ret; u32 buf; - /* clear PHY interrupt status */ - ret = phy_read(phydev, LAN88XX_INT_STS); - if (unlikely(ret < 0)) - return -EIO; - /* clear LAN78xx interrupt status */ ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_); if (unlikely(ret < 0)) @@ -1120,18 +1162,14 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; - phy_mac_interrupt(phydev, 0); - del_timer(&dev->stat_monitor); } else if (phydev->link && !dev->link_on) { dev->link_on = true; - phy_ethtool_gset(phydev, &ecmd); - - ret = phy_read(phydev, LAN88XX_INT_STS); + phy_ethtool_ksettings_get(phydev, &ecmd); if (dev->udev->speed == USB_SPEED_SUPER) { - if (ethtool_cmd_speed(&ecmd) == 1000) { + if (ecmd.base.speed == 1000) { /* disable U2 */ ret = lan78xx_read_reg(dev, USB_CFG1, &buf); buf &= ~USB_CFG1_DEV_U2_INIT_EN_; @@ -1159,10 +1197,10 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) netif_dbg(dev, link, dev->net, "speed: %u duplex: %d anadv: 0x%04x anlpa: 0x%04x", - ethtool_cmd_speed(&ecmd), ecmd.duplex, ladv, radv); + ecmd.base.speed, ecmd.base.duplex, ladv, radv); - ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv); - phy_mac_interrupt(phydev, 1); + ret = lan78xx_update_flowcontrol(dev, ecmd.base.duplex, ladv, + radv); if (!timer_pending(&dev->stat_monitor)) { dev->delta = 1; @@ -1201,7 +1239,10 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) if (intdata & INT_ENP_PHY_INT) { netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); - lan78xx_defer_kevent(dev, EVENT_LINK_RESET); + lan78xx_defer_kevent(dev, EVENT_LINK_RESET); + + if (dev->domain_data.phyirq > 0) + generic_handle_irq(dev->domain_data.phyirq); } else netdev_warn(dev->net, "unexpected interrupt: 0x%08x\n", intdata); @@ -1484,7 +1525,8 @@ static void lan78xx_set_mdix_status(struct net_device *net, __u8 mdix_ctrl) dev->mdix_ctrl = mdix_ctrl; } -static int lan78xx_get_settings(struct net_device *net, struct ethtool_cmd *cmd) +static int lan78xx_get_link_ksettings(struct net_device *net, + struct ethtool_link_ksettings *cmd) { struct lan78xx_net *dev = netdev_priv(net); struct phy_device *phydev = net->phydev; @@ -1495,20 +1537,20 @@ static int lan78xx_get_settings(struct net_device *net, struct ethtool_cmd *cmd) if (ret < 0) return ret; - ret = phy_ethtool_gset(phydev, cmd); + ret = phy_ethtool_ksettings_get(phydev, cmd); buf = lan78xx_get_mdix_status(net); buf &= LAN88XX_EXT_MODE_CTRL_MDIX_MASK_; if (buf == LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_) { - cmd->eth_tp_mdix = ETH_TP_MDI_AUTO; - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; } else if (buf == LAN88XX_EXT_MODE_CTRL_MDI_) { - cmd->eth_tp_mdix = ETH_TP_MDI; - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; } else if (buf == LAN88XX_EXT_MODE_CTRL_MDI_X_) { - cmd->eth_tp_mdix = ETH_TP_MDI_X; - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; } usb_autopm_put_interface(dev->intf); @@ -1516,7 +1558,8 @@ static int lan78xx_get_settings(struct net_device *net, struct ethtool_cmd *cmd) return ret; } -static int lan78xx_set_settings(struct net_device *net, struct ethtool_cmd *cmd) +static int lan78xx_set_link_ksettings(struct net_device *net, + const struct ethtool_link_ksettings *cmd) { struct lan78xx_net *dev = netdev_priv(net); struct phy_device *phydev = net->phydev; @@ -1527,14 +1570,13 @@ static int lan78xx_set_settings(struct net_device *net, struct ethtool_cmd *cmd) if (ret < 0) return ret; - if (dev->mdix_ctrl != cmd->eth_tp_mdix_ctrl) { - lan78xx_set_mdix_status(net, cmd->eth_tp_mdix_ctrl); - } + if (dev->mdix_ctrl != cmd->base.eth_tp_mdix_ctrl) + lan78xx_set_mdix_status(net, cmd->base.eth_tp_mdix_ctrl); /* change speed & duplex */ - ret = phy_ethtool_sset(phydev, cmd); + ret = phy_ethtool_ksettings_set(phydev, cmd); - if (!cmd->autoneg) { + if (!cmd->base.autoneg) { /* force link down */ temp = phy_read(phydev, MII_BMCR); phy_write(phydev, MII_BMCR, temp | BMCR_LOOPBACK); @@ -1552,9 +1594,9 @@ static void lan78xx_get_pause(struct net_device *net, { struct lan78xx_net *dev = netdev_priv(net); struct phy_device *phydev = net->phydev; - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; + struct ethtool_link_ksettings ecmd; - phy_ethtool_gset(phydev, &ecmd); + phy_ethtool_ksettings_get(phydev, &ecmd); pause->autoneg = dev->fc_autoneg; @@ -1570,12 +1612,12 @@ static int lan78xx_set_pause(struct net_device *net, { struct lan78xx_net *dev = netdev_priv(net); struct phy_device *phydev = net->phydev; - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; + struct ethtool_link_ksettings ecmd; int ret; - phy_ethtool_gset(phydev, &ecmd); + phy_ethtool_ksettings_get(phydev, &ecmd); - if (pause->autoneg && !ecmd.autoneg) { + if (pause->autoneg && !ecmd.base.autoneg) { ret = -EINVAL; goto exit; } @@ -1587,13 +1629,21 @@ static int lan78xx_set_pause(struct net_device *net, if (pause->tx_pause) dev->fc_request_control |= FLOW_CTRL_TX; - if (ecmd.autoneg) { + if (ecmd.base.autoneg) { u32 mii_adv; + u32 advertising; - ecmd.advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); + ethtool_convert_link_mode_to_legacy_u32( + &advertising, ecmd.link_modes.advertising); + + advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control); - ecmd.advertising |= mii_adv_to_ethtool_adv_t(mii_adv); - phy_ethtool_sset(phydev, &ecmd); + advertising |= mii_adv_to_ethtool_adv_t(mii_adv); + + ethtool_convert_legacy_u32_to_link_mode( + ecmd.link_modes.advertising, advertising); + + phy_ethtool_ksettings_set(phydev, &ecmd); } dev->fc_autoneg = pause->autoneg; @@ -1609,8 +1659,6 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .get_drvinfo = lan78xx_get_drvinfo, .get_msglevel = lan78xx_get_msglevel, .set_msglevel = lan78xx_set_msglevel, - .get_settings = lan78xx_get_settings, - .set_settings = lan78xx_set_settings, .get_eeprom_len = lan78xx_ethtool_get_eeprom_len, .get_eeprom = lan78xx_ethtool_get_eeprom, .set_eeprom = lan78xx_ethtool_set_eeprom, @@ -1623,6 +1671,8 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .set_eee = lan78xx_set_eee, .get_pauseparam = lan78xx_get_pause, .set_pauseparam = lan78xx_set_pause, + .get_link_ksettings = lan78xx_get_link_ksettings, + .set_link_ksettings = lan78xx_set_link_ksettings, }; static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) @@ -1834,6 +1884,127 @@ static void lan78xx_link_status_change(struct net_device *net) } } +static int irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct irq_domain_data *data = d->host_data; + + irq_set_chip_data(irq, data); + irq_set_chip_and_handler(irq, data->irqchip, data->irq_handler); + irq_set_noprobe(irq); + + return 0; +} + +static void irq_unmap(struct irq_domain *d, unsigned int irq) +{ + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); +} + +static const struct irq_domain_ops chip_domain_ops = { + .map = irq_map, + .unmap = irq_unmap, +}; + +static void lan78xx_irq_mask(struct irq_data *irqd) +{ + struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd); + + data->irqenable &= ~BIT(irqd_to_hwirq(irqd)); +} + +static void lan78xx_irq_unmask(struct irq_data *irqd) +{ + struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd); + + data->irqenable |= BIT(irqd_to_hwirq(irqd)); +} + +static void lan78xx_irq_bus_lock(struct irq_data *irqd) +{ + struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd); + + mutex_lock(&data->irq_lock); +} + +static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd) +{ + struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd); + struct lan78xx_net *dev = + container_of(data, struct lan78xx_net, domain_data); + u32 buf; + int ret; + + /* call register access here because irq_bus_lock & irq_bus_sync_unlock + * are only two callbacks executed in non-atomic contex. + */ + ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); + if (buf != data->irqenable) + ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + + mutex_unlock(&data->irq_lock); +} + +static struct irq_chip lan78xx_irqchip = { + .name = "lan78xx-irqs", + .irq_mask = lan78xx_irq_mask, + .irq_unmask = lan78xx_irq_unmask, + .irq_bus_lock = lan78xx_irq_bus_lock, + .irq_bus_sync_unlock = lan78xx_irq_bus_sync_unlock, +}; + +static int lan78xx_setup_irq_domain(struct lan78xx_net *dev) +{ + struct device_node *of_node; + struct irq_domain *irqdomain; + unsigned int irqmap = 0; + u32 buf; + int ret = 0; + + of_node = dev->udev->dev.parent->of_node; + + mutex_init(&dev->domain_data.irq_lock); + + lan78xx_read_reg(dev, INT_EP_CTL, &buf); + dev->domain_data.irqenable = buf; + + dev->domain_data.irqchip = &lan78xx_irqchip; + dev->domain_data.irq_handler = handle_simple_irq; + + irqdomain = irq_domain_add_simple(of_node, MAX_INT_EP, 0, + &chip_domain_ops, &dev->domain_data); + if (irqdomain) { + /* create mapping for PHY interrupt */ + irqmap = irq_create_mapping(irqdomain, INT_EP_PHY); + if (!irqmap) { + irq_domain_remove(irqdomain); + + irqdomain = NULL; + ret = -EINVAL; + } + } else { + ret = -EINVAL; + } + + dev->domain_data.irqdomain = irqdomain; + dev->domain_data.phyirq = irqmap; + + return ret; +} + +static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) +{ + if (dev->domain_data.phyirq > 0) { + irq_dispose_mapping(dev->domain_data.phyirq); + + if (dev->domain_data.irqdomain) + irq_domain_remove(dev->domain_data.irqdomain); + } + dev->domain_data.phyirq = 0; + dev->domain_data.irqdomain = NULL; +} + static int lan78xx_phy_init(struct lan78xx_net *dev) { int ret; @@ -1846,15 +2017,12 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) return -EIO; } - /* Enable PHY interrupts. - * We handle our own interrupt - */ - ret = phy_read(phydev, LAN88XX_INT_STS); - ret = phy_write(phydev, LAN88XX_INT_MASK, - LAN88XX_INT_MASK_MDINTPIN_EN_ | - LAN88XX_INT_MASK_LINK_CHANGE_); - - phydev->irq = PHY_IGNORE_INTERRUPT; + /* if phyirq is not set, use polling mode in phylib */ + if (dev->domain_data.phyirq > 0) + phydev->irq = dev->domain_data.phyirq; + else + phydev->irq = 0; + netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq); ret = phy_connect_direct(dev->net, phydev, lan78xx_link_status_change, @@ -1970,11 +2138,6 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) int old_rx_urb_size = dev->rx_urb_size; int ret; - if (new_mtu > MAX_SINGLE_PACKET_SIZE) - return -EINVAL; - - if (new_mtu <= 0) - return -EINVAL; /* no second zero-length packet read wanted after mtu-sized packets */ if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; @@ -2250,11 +2413,6 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_; ret = lan78xx_write_reg(dev, MAC_CR, buf); - /* enable PHY interrupts */ - ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); - buf |= INT_ENP_PHY_INT; - ret = lan78xx_write_reg(dev, INT_EP_CTL, buf); - ret = lan78xx_read_reg(dev, MAC_TX, &buf); buf |= MAC_TX_TXEN_; ret = lan78xx_write_reg(dev, MAC_TX, buf); @@ -2663,6 +2821,14 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) dev->net->hw_features = dev->net->features; + ret = lan78xx_setup_irq_domain(dev); + if (ret < 0) { + netdev_warn(dev->net, + "lan78xx_setup_irq_domain() failed : %d", ret); + kfree(pdata); + return ret; + } + /* Init all registers */ ret = lan78xx_reset(dev); @@ -2679,6 +2845,8 @@ static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf) { struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); + lan78xx_remove_irq_domain(dev); + lan78xx_remove_mdio(dev); if (pdata) { @@ -3378,6 +3546,9 @@ static int lan78xx_probe(struct usb_interface *intf, if (netdev->mtu > (dev->hard_mtu - netdev->hard_header_len)) netdev->mtu = dev->hard_mtu - netdev->hard_header_len; + /* MTU range: 68 - 9000 */ + netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; + dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; dev->ep_intr = (intf->cur_altsetting)->endpoint + 2; diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 1434e5dd5f9c..399f7ee57aea 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1273,7 +1273,6 @@ static const struct net_device_ops pegasus_netdev_ops = { .ndo_set_rx_mode = pegasus_set_multicast, .ndo_get_stats = pegasus_netdev_stats, .ndo_tx_timeout = pegasus_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index efb84f092492..7dc61228c55b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4116,14 +4116,12 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) switch (tp->version) { case RTL_VER_01: case RTL_VER_02: - return eth_change_mtu(dev, new_mtu); + dev->mtu = new_mtu; + return 0; default: break; } - if (new_mtu < 68 || new_mtu > RTL8153_MAX_MTU) - return -EINVAL; - ret = usb_autopm_get_interface(tp->intf); if (ret < 0) return ret; @@ -4313,6 +4311,18 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); + /* MTU range: 68 - 1500 or 9194 */ + netdev->min_mtu = ETH_MIN_MTU; + switch (tp->version) { + case RTL_VER_01: + case RTL_VER_02: + netdev->max_mtu = ETH_DATA_LEN; + break; + default: + netdev->max_mtu = RTL8153_MAX_MTU; + break; + } + tp->mii.dev = netdev; tp->mii.mdio_read = read_mii_word; tp->mii.mdio_write = write_mii_word; @@ -4413,8 +4423,12 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, - {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 7c72bfac89d0..93a1bda1c1e5 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -847,7 +847,6 @@ static const struct net_device_ops rtl8150_netdev_ops = { .ndo_set_rx_mode = rtl8150_set_multicast, .ndo_set_mac_address = rtl8150_set_mac_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index a251588762ec..12071f1582df 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -165,7 +165,6 @@ struct lsi_umts { /* Forward definitions */ static void sierra_sync_timer(unsigned long syncdata); -static int sierra_net_change_mtu(struct net_device *net, int new_mtu); /* Our own net device operations structure */ static const struct net_device_ops sierra_net_device_ops = { @@ -173,7 +172,7 @@ static const struct net_device_ops sierra_net_device_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_change_mtu = sierra_net_change_mtu, + .ndo_change_mtu = usbnet_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -622,15 +621,6 @@ static const struct ethtool_ops sierra_net_ethtool_ops = { .nway_reset = usbnet_nway_reset, }; -/* MTU can not be more than 1500 bytes, enforce it. */ -static int sierra_net_change_mtu(struct net_device *net, int new_mtu) -{ - if (new_mtu > SIERRA_NET_MAX_SUPPORTED_MTU) - return -EINVAL; - - return usbnet_change_mtu(net, new_mtu); -} - static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap) { int result = 0; @@ -720,6 +710,7 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->hard_header_len += SIERRA_NET_HIP_EXT_HDR_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + dev->net->max_mtu = SIERRA_NET_MAX_SUPPORTED_MTU; /* Set up the netdev */ dev->net->flags |= IFF_NOARP; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 9af9799935db..0b17b40d7a4f 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -925,9 +925,6 @@ static int smsc75xx_change_mtu(struct net_device *netdev, int new_mtu) struct usbnet *dev = netdev_priv(netdev); int ret; - if (new_mtu > MAX_SINGLE_PACKET_SIZE) - return -EINVAL; - ret = smsc75xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); if (ret < 0) { netdev_warn(dev->net, "Failed to set mac rx frame length\n"); @@ -1448,6 +1445,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC75XX_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE; return 0; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d5071e364d40..3de65ea6531a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -384,8 +384,6 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu) int old_hard_mtu = dev->hard_mtu; int old_rx_urb_size = dev->rx_urb_size; - if (new_mtu <= 0) - return -EINVAL; // no second zero-length packet read wanted after mtu-sized packets if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; @@ -1669,6 +1667,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) * bind() should set rx_urb_size in that case. */ dev->hard_mtu = net->mtu + net->hard_header_len; + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; net->netdev_ops = &usbnet_netdev_ops; net->watchdog_timeo = TX_TIMEOUT_JIFFIES; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index fbc853e64531..0520952aa096 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -23,9 +23,6 @@ #define DRV_NAME "veth" #define DRV_VERSION "1.0" -#define MIN_MTU 68 /* Min L3 MTU */ -#define MAX_MTU 65535 /* Max L3 MTU (arbitrary) */ - struct pcpu_vstats { u64 packets; u64 bytes; @@ -216,17 +213,9 @@ static int veth_close(struct net_device *dev) return 0; } -static int is_valid_veth_mtu(int new_mtu) +static int is_valid_veth_mtu(int mtu) { - return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU; -} - -static int veth_change_mtu(struct net_device *dev, int new_mtu) -{ - if (!is_valid_veth_mtu(new_mtu)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; + return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; } static int veth_dev_init(struct net_device *dev) @@ -300,7 +289,6 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_open = veth_open, .ndo_stop = veth_close, .ndo_start_xmit = veth_xmit, - .ndo_change_mtu = veth_change_mtu, .ndo_get_stats64 = veth_get_stats64, .ndo_set_rx_mode = veth_set_multicast_list, .ndo_set_mac_address = eth_mac_addr, @@ -337,6 +325,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX); dev->destructor = veth_dev_free; + dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; dev->hw_enc_features = VETH_FEATURES; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fd8b1e62301f..ca5239aea4d9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1419,17 +1419,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_settings = virtnet_set_settings, }; -#define MIN_MTU 68 -#define MAX_MTU 65535 - -static int virtnet_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < MIN_MTU || new_mtu > MAX_MTU) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -1437,7 +1426,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = virtnet_set_mac_address, .ndo_set_rx_mode = virtnet_set_rx_mode, - .ndo_change_mtu = virtnet_change_mtu, .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, @@ -1748,6 +1736,9 @@ static bool virtnet_validate_features(struct virtio_device *vdev) return true; } +#define MIN_MTU ETH_MIN_MTU +#define MAX_MTU ETH_MAX_MTU + static int virtnet_probe(struct virtio_device *vdev) { int i, err; @@ -1821,6 +1812,10 @@ static int virtnet_probe(struct virtio_device *vdev) dev->vlan_features = dev->features; + /* MTU range: 68 - 65535 */ + dev->min_mtu = MIN_MTU; + dev->max_mtu = MAX_MTU; + /* Configuration may specify what MAC to use. Otherwise random. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) virtio_cread_bytes(vdev, @@ -1875,8 +1870,12 @@ static int virtnet_probe(struct virtio_device *vdev) mtu = virtio_cread16(vdev, offsetof(struct virtio_net_config, mtu)); - if (virtnet_change_mtu(dev, mtu)) + if (mtu < dev->min_mtu) { __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); + } else { + dev->mtu = mtu; + dev->max_mtu = mtu; + } } if (vi->any_header_sg) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ef83ae3b0a44..e34b1297c96a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2972,9 +2972,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU) - return -EINVAL; - netdev->mtu = new_mtu; /* @@ -3431,6 +3428,10 @@ vmxnet3_probe_device(struct pci_dev *pdev, vmxnet3_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; + /* MTU range: 60 - 9000 */ + netdev->min_mtu = VMXNET3_MIN_MTU; + netdev->max_mtu = VMXNET3_MAX_MTU; + INIT_WORK(&adapter->work, vmxnet3_reset_work); set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 820de6a9ddde..3bca24651dc0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -272,11 +272,6 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, if (IS_ERR(rt)) goto err; - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { - ip_rt_put(rt); - goto err; - } - skb_dst_drop(skb); /* if dst.dev is loopback or the VRF device again this is locally @@ -611,6 +606,10 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, struct dst_entry *dst = NULL; struct rtable *rth; + /* don't divert multicast */ + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + return skb; + rcu_read_lock(); rth = rcu_dereference(vrf->rth); @@ -999,6 +998,9 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + goto out; + /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb */ @@ -1162,8 +1164,19 @@ static int vrf_add_fib_rules(const struct net_device *dev) if (err < 0) goto ipv6_err; +#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) + err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true); + if (err < 0) + goto ipmr_err; +#endif + return 0; +#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) +ipmr_err: + vrf_fib_rule(dev, AF_INET6, false); +#endif + ipv6_err: vrf_fib_rule(dev, AF_INET, false); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 24532cdebb00..5264c1a49d86 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2390,43 +2390,31 @@ static void vxlan_set_multicast_list(struct net_device *dev) { } -static int __vxlan_change_mtu(struct net_device *dev, - struct net_device *lowerdev, - struct vxlan_rdst *dst, int new_mtu, bool strict) +static int vxlan_change_mtu(struct net_device *dev, int new_mtu) { - int max_mtu = IP_MAX_MTU; - - if (lowerdev) - max_mtu = lowerdev->mtu; + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + struct net_device *lowerdev = __dev_get_by_index(vxlan->net, + dst->remote_ifindex); + bool use_ipv6 = false; if (dst->remote_ip.sa.sa_family == AF_INET6) - max_mtu -= VXLAN6_HEADROOM; - else - max_mtu -= VXLAN_HEADROOM; - - if (new_mtu < 68) - return -EINVAL; + use_ipv6 = true; - if (new_mtu > max_mtu) { - if (strict) + /* This check is different than dev->max_mtu, because it looks at + * the lowerdev->mtu, rather than the static dev->max_mtu + */ + if (lowerdev) { + int max_mtu = lowerdev->mtu - + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + if (new_mtu > max_mtu) return -EINVAL; - - new_mtu = max_mtu; } dev->mtu = new_mtu; return 0; } -static int vxlan_change_mtu(struct net_device *dev, int new_mtu) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; - struct net_device *lowerdev = __dev_get_by_index(vxlan->net, - dst->remote_ifindex); - return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true); -} - static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2817,6 +2805,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, vxlan_ether_setup(dev); } + /* MTU range: 68 - 65535 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; + vxlan->net = src_net; dst->remote_vni = conf->vni; @@ -2860,7 +2852,8 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, #endif if (!conf->mtu) - dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + dev->mtu = lowerdev->mtu - + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); needed_headroom = lowerdev->hard_header_len; } else if (vxlan_addr_multicast(&dst->remote_ip)) { @@ -2869,9 +2862,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, } if (conf->mtu) { - err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false); - if (err) - return err; + int max_mtu = ETH_MAX_MTU; + + if (lowerdev) + max_mtu = lowerdev->mtu; + + max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + + if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu) + return -EINVAL; + + dev->mtu = conf->mtu; + + if (conf->mtu > max_mtu) + dev->mtu = max_mtu; } if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c index 09a50751763b..2371e078afbb 100644 --- a/drivers/net/wan/c101.c +++ b/drivers/net/wan/c101.c @@ -302,7 +302,6 @@ static void c101_destroy_card(card_t *card) static const struct net_device_ops c101_ops = { .ndo_open = c101_open, .ndo_stop = c101_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = c101_ioctl, }; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index b87fe0a01c69..087eb266601f 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -432,7 +432,6 @@ module_exit(cosa_exit); static const struct net_device_ops cosa_ops = { .ndo_open = cosa_net_open, .ndo_stop = cosa_net_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = cosa_net_ioctl, .ndo_tx_timeout = cosa_net_timeout, diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 629225980463..7351e5440ed7 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -887,7 +887,6 @@ static inline int dscc4_set_quartz(struct dscc4_dev_priv *dpriv, int hz) static const struct net_device_ops dscc4_ops = { .ndo_open = dscc4_open, .ndo_stop = dscc4_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = dscc4_ioctl, .ndo_tx_timeout = dscc4_tx_timeout, diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 3c9cbf908ec7..03696d35ee9c 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2394,7 +2394,6 @@ fst_init_card(struct fst_card_info *card) static const struct net_device_ops fst_ops = { .ndo_open = fst_open, .ndo_stop = fst_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = fst_ioctl, .ndo_tx_timeout = fst_tx_timeout, diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 65647533b401..e38ce4da3efb 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -992,7 +992,6 @@ static const struct dev_pm_ops uhdlc_pm_ops = { static const struct net_device_ops uhdlc_ops = { .ndo_open = uhdlc_open, .ndo_stop = uhdlc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = uhdlc_ioctl, }; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 9bd4aa8083ce..7221a53b8b14 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -46,14 +46,6 @@ static const char* version = "HDLC support module revision 1.22"; static struct hdlc_proto *first_proto; -int hdlc_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { @@ -237,6 +229,8 @@ static void hdlc_setup_dev(struct net_device *dev) dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->priv_flags = IFF_WAN_HDLC; dev->mtu = HDLC_MAX_MTU; + dev->min_mtu = 68; + dev->max_mtu = HDLC_MAX_MTU; dev->type = ARPHRD_RAWHDLC; dev->hard_header_len = 16; dev->addr_len = 0; @@ -353,7 +347,6 @@ MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>"); MODULE_DESCRIPTION("HDLC support module"); MODULE_LICENSE("GPL v2"); -EXPORT_SYMBOL(hdlc_change_mtu); EXPORT_SYMBOL(hdlc_start_xmit); EXPORT_SYMBOL(hdlc_open); EXPORT_SYMBOL(hdlc_close); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index b6e0cfb095d3..eb915281197e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1053,7 +1053,6 @@ static void pvc_setup(struct net_device *dev) static const struct net_device_ops pvc_ops = { .ndo_open = pvc_open, .ndo_stop = pvc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = pvc_xmit, .ndo_do_ioctl = pvc_ioctl, }; @@ -1096,6 +1095,8 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) } dev->netdev_ops = &pvc_ops; dev->mtu = HDLC_MAX_MTU; + dev->min_mtu = 68; + dev->max_mtu = HDLC_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; dev->ml_priv = pvc; diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 3d741663fd67..dd6bb3364ad2 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -180,7 +180,6 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops hostess_ops = { .ndo_open = hostess_open, .ndo_stop = hostess_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hostess_ioctl, }; diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index e7bbdb7af53a..6a505c26a3e7 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -1321,7 +1321,6 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static const struct net_device_ops hss_hdlc_ops = { .ndo_open = hss_hdlc_open, .ndo_stop = hss_hdlc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hss_hdlc_ioctl, }; diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 299140c04556..001b7796740d 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -808,7 +808,6 @@ static int lmc_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops lmc_ops = { .ndo_open = lmc_open, .ndo_stop = lmc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = lmc_ioctl, .ndo_tx_timeout = lmc_driver_timeout, diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c index 315bf09d6a20..c8f4517db3a0 100644 --- a/drivers/net/wan/n2.c +++ b/drivers/net/wan/n2.c @@ -330,7 +330,6 @@ static void n2_destroy_card(card_t *card) static const struct net_device_ops n2_ops = { .ndo_open = n2_open, .ndo_stop = n2_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = n2_ioctl, }; diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c index db363856e0b5..e1dd1ec18d64 100644 --- a/drivers/net/wan/pc300too.c +++ b/drivers/net/wan/pc300too.c @@ -291,7 +291,6 @@ static void pc300_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops pc300_ops = { .ndo_open = pc300_open, .ndo_stop = pc300_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = pc300_ioctl, }; diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c index e8455621390e..4e437c599e9a 100644 --- a/drivers/net/wan/pci200syn.c +++ b/drivers/net/wan/pci200syn.c @@ -270,7 +270,6 @@ static void pci200_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops pci200_ops = { .ndo_open = pci200_open, .ndo_stop = pci200_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = pci200_ioctl, }; diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index 3a421ca8a4d0..3f83be98d469 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -211,7 +211,6 @@ static const struct net_device_ops sbni_netdev_ops = { .ndo_start_xmit = sbni_start_xmit, .ndo_set_rx_mode = set_multicast_list, .ndo_do_ioctl = sbni_ioctl, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 27860b4f5908..fbb5aa2c4d8f 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -174,7 +174,6 @@ static int sealevel_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops sealevel_ops = { .ndo_open = sealevel_open, .ndo_stop = sealevel_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = sealevel_ioctl, }; diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index a20d688d2595..0c7317520ed3 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -551,7 +551,6 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops wanxl_ops = { .ndo_open = wanxl_open, .ndo_stop = wanxl_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = wanxl_ioctl, .ndo_get_stats = wanxl_get_stats, diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 1bc5e93d2a34..878b05d06fc7 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -124,9 +124,6 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu) unsigned char *xbuff, *rbuff; int len; - if (newmtu > 65534) - return -EINVAL; - len = 2 * newmtu; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); @@ -751,6 +748,8 @@ static void x25_asy_setup(struct net_device *dev) */ dev->mtu = SL_MTU; + dev->min_mtu = 0; + dev->max_mtu = 65534; dev->netdev_ops = &x25_asy_netdev_ops; dev->watchdog_timeo = HZ*20; dev->hard_header_len = 0; diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c index bb74f4b9a02f..7f64e74d746b 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/net/wimax/i2400m/netdev.c @@ -395,25 +395,6 @@ drop: static -int i2400m_change_mtu(struct net_device *net_dev, int new_mtu) -{ - int result; - struct i2400m *i2400m = net_dev_to_i2400m(net_dev); - struct device *dev = i2400m_dev(i2400m); - - if (new_mtu >= I2400M_MAX_MTU) { - dev_err(dev, "Cannot change MTU to %d (max is %d)\n", - new_mtu, I2400M_MAX_MTU); - result = -EINVAL; - } else { - net_dev->mtu = new_mtu; - result = 0; - } - return result; -} - - -static void i2400m_tx_timeout(struct net_device *net_dev) { /* @@ -590,7 +571,6 @@ static const struct net_device_ops i2400m_netdev_ops = { .ndo_stop = i2400m_stop, .ndo_start_xmit = i2400m_hard_start_xmit, .ndo_tx_timeout = i2400m_tx_timeout, - .ndo_change_mtu = i2400m_change_mtu, }; static void i2400m_get_drvinfo(struct net_device *net_dev, @@ -621,6 +601,8 @@ void i2400m_netdev_setup(struct net_device *net_dev) d_fnstart(3, NULL, "(net_dev %p)\n", net_dev); ether_setup(net_dev); net_dev->mtu = I2400M_MAX_MTU; + net_dev->min_mtu = 0; + net_dev->max_mtu = I2400M_MAX_MTU; net_dev->tx_queue_len = I2400M_TX_QLEN; net_dev->features = NETIF_F_VLAN_CHALLENGED diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 8c8edaf1bba6..8f5a3f4a43f2 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -17,6 +17,19 @@ menuconfig WLAN if WLAN +config WIRELESS_WDS + bool "mac80211-based legacy WDS support" if EXPERT + help + This option enables the deprecated WDS support, the newer + mac80211-based 4-addr AP/client support supersedes it with + a much better feature set (HT, VHT, ...) + + We plan to remove this option and code, so if you find + that you have to enable it, please let us know on the + linux-wireless@vger.kernel.org mailing list, so we can + help you migrate to 4-addr AP/client (or, if it's really + necessary, give up on our plan of removing it). + source "drivers/net/wireless/admtek/Kconfig" source "drivers/net/wireless/ath/Kconfig" source "drivers/net/wireless/atmel/Kconfig" diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 76297d69f1ed..e322b6df0ebc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -7881,6 +7881,7 @@ int ath10k_mac_register(struct ath10k *ar) ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF); ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA); ieee80211_hw_set(ar->hw, QUEUE_CONTROL); + ieee80211_hw_set(ar->hw, SUPPORTS_TX_FRAG); if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ieee80211_hw_set(ar->hw, SW_CRYPTO_CONTROL); diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index cfa3fe82ade3..368d9b313823 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -734,9 +734,11 @@ static const struct ieee80211_iface_limit if_limits[] = { BIT(NL80211_IFTYPE_P2P_GO) }, }; +#ifdef CONFIG_WIRELESS_WDS static const struct ieee80211_iface_limit wds_limits[] = { { .max = 2048, .types = BIT(NL80211_IFTYPE_WDS) }, }; +#endif #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT @@ -774,6 +776,7 @@ static const struct ieee80211_iface_combination if_comb[] = { BIT(NL80211_CHAN_WIDTH_40), #endif }, +#ifdef CONFIG_WIRELESS_WDS { .limits = wds_limits, .n_limits = ARRAY_SIZE(wds_limits), @@ -781,6 +784,7 @@ static const struct ieee80211_iface_combination if_comb[] = { .num_different_channels = 1, .beacon_int_infra_match = true, }, +#endif }; #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT @@ -851,7 +855,9 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_MESH_POINT) | +#ifdef CONFIG_WIRELESS_WDS BIT(NL80211_IFTYPE_WDS) | +#endif BIT(NL80211_IFTYPE_OCB); if (ath9k_is_chanctx_enabled()) diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 61de5e9f8ef0..d18372cdc8ca 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -41,21 +41,6 @@ static int wil_stop(struct net_device *ndev) return wil_down(wil); } -static int wil_change_mtu(struct net_device *ndev, int new_mtu) -{ - struct wil6210_priv *wil = ndev_to_wil(ndev); - - if (new_mtu < 68 || new_mtu > mtu_max) { - wil_err(wil, "invalid MTU %d\n", new_mtu); - return -EINVAL; - } - - wil_dbg_misc(wil, "change MTU %d -> %d\n", ndev->mtu, new_mtu); - ndev->mtu = new_mtu; - - return 0; -} - static int wil_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) { struct wil6210_priv *wil = ndev_to_wil(ndev); @@ -69,7 +54,6 @@ static const struct net_device_ops wil_netdev_ops = { .ndo_start_xmit = wil_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = wil_change_mtu, .ndo_do_ioctl = wil_do_ioctl, }; @@ -126,6 +110,7 @@ static int wil6210_netdev_poll_tx(struct napi_struct *napi, int budget) static void wil_dev_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = mtu_max; dev->tx_queue_len = WIL_TX_Q_LEN_DEFAULT; } diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index bf2e9a083c0c..eb92d5ab7a27 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -1295,14 +1295,6 @@ static struct iw_statistics *atmel_get_wireless_stats(struct net_device *dev) return &priv->wstats; } -static int atmel_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > 2312)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static int atmel_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@ -1506,7 +1498,6 @@ static const struct file_operations atmel_proc_fops = { static const struct net_device_ops atmel_netdev_ops = { .ndo_open = atmel_open, .ndo_stop = atmel_close, - .ndo_change_mtu = atmel_change_mtu, .ndo_set_mac_address = atmel_set_mac_address, .ndo_start_xmit = start_tx, .ndo_do_ioctl = atmel_ioctl, @@ -1600,6 +1591,10 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port, dev->irq = irq; dev->base_addr = port; + /* MTU range: 68 - 2312 */ + dev->min_mtu = 68; + dev->max_mtu = MAX_WIRELESS_BODY - ETH_FCS_LEN; + SET_NETDEV_DEV(dev, sys_dev); if ((rc = request_irq(dev->irq, service_interrupt, IRQF_SHARED, dev->name, dev))) { diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index 6e5d9095b195..52f3541ecbcf 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -5591,7 +5591,9 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev) BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_STATION) | +#ifdef CONFIG_WIRELESS_WDS BIT(NL80211_IFTYPE_WDS) | +#endif BIT(NL80211_IFTYPE_ADHOC); hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 83770d2ea057..e97ab2b91663 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -3838,7 +3838,9 @@ static int b43legacy_wireless_init(struct ssb_device *dev) hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | +#ifdef CONFIG_WIRELESS_WDS BIT(NL80211_IFTYPE_WDS) | +#endif BIT(NL80211_IFTYPE_ADHOC); hw->queues = 1; /* FIXME: hardware has more queues */ hw->max_rates = 2; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 78d9966a3957..cf267f9da753 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -414,23 +414,24 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg, struct brcmf_cfg80211_vif *vif, enum nl80211_iftype new_type) { - int iftype_num[NUM_NL80211_IFTYPES]; struct brcmf_cfg80211_vif *pos; bool check_combos = false; int ret = 0; + struct iface_combination_params params = { + .num_different_channels = 1, + }; - memset(&iftype_num[0], 0, sizeof(iftype_num)); list_for_each_entry(pos, &cfg->vif_list, list) if (pos == vif) { - iftype_num[new_type]++; + params.iftype_num[new_type]++; } else { /* concurrent interfaces so need check combinations */ check_combos = true; - iftype_num[pos->wdev.iftype]++; + params.iftype_num[pos->wdev.iftype]++; } if (check_combos) - ret = cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num); + ret = cfg80211_check_combinations(cfg->wiphy, ¶ms); return ret; } @@ -438,15 +439,16 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg, static int brcmf_vif_add_validate(struct brcmf_cfg80211_info *cfg, enum nl80211_iftype new_type) { - int iftype_num[NUM_NL80211_IFTYPES]; struct brcmf_cfg80211_vif *pos; + struct iface_combination_params params = { + .num_different_channels = 1, + }; - memset(&iftype_num[0], 0, sizeof(iftype_num)); list_for_each_entry(pos, &cfg->vif_list, list) - iftype_num[pos->wdev.iftype]++; + params.iftype_num[pos->wdev.iftype]++; - iftype_num[new_type]++; - return cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num); + params.iftype_num[new_type]++; + return cfg80211_check_combinations(cfg->wiphy, ¶ms); } static void convert_key_from_CPU(struct brcmf_wsec_key *key, diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 69b826d229c5..4b040451a9b8 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -2329,14 +2329,6 @@ static int airo_set_mac_address(struct net_device *dev, void *p) return 0; } -static int airo_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > 2400)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static LIST_HEAD(airo_devices); static void add_airo_dev(struct airo_info *ai) @@ -2656,7 +2648,6 @@ static const struct net_device_ops airo11_netdev_ops = { .ndo_get_stats = airo_get_stats, .ndo_set_mac_address = airo_set_mac_address, .ndo_do_ioctl = airo_ioctl, - .ndo_change_mtu = airo_change_mtu, }; static void wifi_setup(struct net_device *dev) @@ -2668,6 +2659,8 @@ static void wifi_setup(struct net_device *dev) dev->type = ARPHRD_IEEE80211; dev->hard_header_len = ETH_HLEN; dev->mtu = AIRO_DEF_MTU; + dev->min_mtu = 68; + dev->max_mtu = MIC_MSGLEN_MAX; dev->addr_len = ETH_ALEN; dev->tx_queue_len = 100; @@ -2754,7 +2747,6 @@ static const struct net_device_ops airo_netdev_ops = { .ndo_set_rx_mode = airo_set_multicast_list, .ndo_set_mac_address = airo_set_mac_address, .ndo_do_ioctl = airo_ioctl, - .ndo_change_mtu = airo_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -2766,7 +2758,6 @@ static const struct net_device_ops mpi_netdev_ops = { .ndo_set_rx_mode = airo_set_multicast_list, .ndo_set_mac_address = airo_set_mac_address, .ndo_do_ioctl = airo_ioctl, - .ndo_change_mtu = airo_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -2822,6 +2813,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port, dev->irq = irq; dev->base_addr = port; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->max_mtu = MIC_MSGLEN_MAX; SET_NETDEV_DEV(dev, dmdev); diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index bfa542c8d6f1..64176090b196 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -6035,7 +6035,6 @@ static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, .ndo_stop = ipw2100_close, .ndo_start_xmit = libipw_xmit, - .ndo_change_mtu = libipw_change_mtu, .ndo_tx_timeout = ipw2100_tx_timeout, .ndo_set_mac_address = ipw2100_set_address, .ndo_validate_addr = eth_validate_addr, @@ -6071,6 +6070,8 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, dev->wireless_data = &priv->wireless_data; dev->watchdog_timeo = 3 * HZ; dev->irq = 0; + dev->min_mtu = 68; + dev->max_mtu = LIBIPW_DATA_LEN; /* NOTE: We don't use the wireless_handlers hook * in dev as the system will start throwing WX requests diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index bfd68612a535..ef9af8a29cad 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -11561,7 +11561,6 @@ static const struct net_device_ops ipw_prom_netdev_ops = { .ndo_open = ipw_prom_open, .ndo_stop = ipw_prom_stop, .ndo_start_xmit = ipw_prom_hard_start_xmit, - .ndo_change_mtu = libipw_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -11587,6 +11586,9 @@ static int ipw_prom_alloc(struct ipw_priv *priv) priv->prom_net_dev->type = ARPHRD_IEEE80211_RADIOTAP; priv->prom_net_dev->netdev_ops = &ipw_prom_netdev_ops; + priv->prom_net_dev->min_mtu = 68; + priv->prom_net_dev->max_mtu = LIBIPW_DATA_LEN; + priv->prom_priv->ieee->iw_mode = IW_MODE_MONITOR; SET_NETDEV_DEV(priv->prom_net_dev, &priv->pci_dev->dev); @@ -11619,7 +11621,6 @@ static const struct net_device_ops ipw_netdev_ops = { .ndo_set_rx_mode = ipw_net_set_multicast_list, .ndo_set_mac_address = ipw_net_set_mac_address, .ndo_start_xmit = libipw_xmit, - .ndo_change_mtu = libipw_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -11729,6 +11730,9 @@ static int ipw_pci_probe(struct pci_dev *pdev, net_dev->wireless_handlers = &ipw_wx_handler_def; net_dev->ethtool_ops = &ipw_ethtool_ops; + net_dev->min_mtu = 68; + net_dev->max_mtu = LIBIPW_DATA_LEN; + err = sysfs_create_group(&pdev->dev.kobj, &ipw_attribute_group); if (err) { IPW_ERROR("failed to create sysfs device attributes\n"); diff --git a/drivers/net/wireless/intel/ipw2x00/libipw.h b/drivers/net/wireless/intel/ipw2x00/libipw.h index b0571618c2ed..b51355134e04 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw.h +++ b/drivers/net/wireless/intel/ipw2x00/libipw.h @@ -948,7 +948,6 @@ static inline int libipw_is_cck_rate(u8 rate) /* libipw.c */ void free_libipw(struct net_device *dev, int monitor); struct net_device *alloc_libipw(int sizeof_priv, int monitor); -int libipw_change_mtu(struct net_device *dev, int new_mtu); void libipw_networks_age(struct libipw_device *ieee, unsigned long age_secs); diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_module.c b/drivers/net/wireless/intel/ipw2x00/libipw_module.c index 60f28740f6af..2332075565f2 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_module.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_module.c @@ -118,15 +118,6 @@ static void libipw_networks_initialize(struct libipw_device *ieee) &ieee->network_free_list); } -int libipw_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > LIBIPW_DATA_LEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(libipw_change_mtu); - struct net_device *alloc_libipw(int sizeof_priv, int monitor) { struct libipw_device *ieee; diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 80d4228ba754..1a16b8cb366e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -765,16 +765,6 @@ static void hostap_set_multicast_list(struct net_device *dev) } -static int prism2_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < PRISM2_MIN_MTU || new_mtu > PRISM2_MAX_MTU) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - - static void prism2_tx_timeout(struct net_device *dev) { struct hostap_interface *iface; @@ -813,7 +803,6 @@ static const struct net_device_ops hostap_netdev_ops = { .ndo_do_ioctl = hostap_ioctl, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, - .ndo_change_mtu = prism2_change_mtu, .ndo_tx_timeout = prism2_tx_timeout, .ndo_validate_addr = eth_validate_addr, }; @@ -826,7 +815,6 @@ static const struct net_device_ops hostap_mgmt_netdev_ops = { .ndo_do_ioctl = hostap_ioctl, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, - .ndo_change_mtu = prism2_change_mtu, .ndo_tx_timeout = prism2_tx_timeout, .ndo_validate_addr = eth_validate_addr, }; @@ -839,7 +827,6 @@ static const struct net_device_ops hostap_master_ops = { .ndo_do_ioctl = hostap_ioctl, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, - .ndo_change_mtu = prism2_change_mtu, .ndo_tx_timeout = prism2_tx_timeout, .ndo_validate_addr = eth_validate_addr, }; @@ -851,6 +838,8 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local, iface = netdev_priv(dev); ether_setup(dev); + dev->min_mtu = PRISM2_MIN_MTU; + dev->max_mtu = PRISM2_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; /* kernel callbacks */ diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c index 7afe2004e930..9d96b7c928f7 100644 --- a/drivers/net/wireless/intersil/orinoco/main.c +++ b/drivers/net/wireless/intersil/orinoco/main.c @@ -322,9 +322,6 @@ int orinoco_change_mtu(struct net_device *dev, int new_mtu) { struct orinoco_private *priv = ndev_priv(dev); - if ((new_mtu < ORINOCO_MIN_MTU) || (new_mtu > ORINOCO_MAX_MTU)) - return -EINVAL; - /* MTU + encapsulation + header length */ if ((new_mtu + ENCAPS_OVERHEAD + sizeof(struct ieee80211_hdr)) > (priv->nicbuf_size - ETH_HLEN)) @@ -2288,6 +2285,9 @@ int orinoco_if_add(struct orinoco_private *priv, dev->base_addr = base_addr; dev->irq = irq; + dev->min_mtu = ORINOCO_MIN_MTU; + dev->max_mtu = ORINOCO_MAX_MTU; + SET_NETDEV_DEV(dev, priv->dev); ret = register_netdev(dev); if (ret) diff --git a/drivers/net/wireless/intersil/prism54/islpci_dev.c b/drivers/net/wireless/intersil/prism54/islpci_dev.c index 84a42012aeae..325176d4d796 100644 --- a/drivers/net/wireless/intersil/prism54/islpci_dev.c +++ b/drivers/net/wireless/intersil/prism54/islpci_dev.c @@ -808,7 +808,6 @@ static const struct net_device_ops islpci_netdev_ops = { .ndo_start_xmit = islpci_eth_transmit, .ndo_tx_timeout = islpci_eth_tx_timeout, .ndo_set_mac_address = prism54_set_mac_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 431f13b4faf6..8f366cc097e6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -587,15 +587,8 @@ struct hwsim_radiotap_ack_hdr { __le16 rt_chbitmask; } __packed; -/* MAC80211_HWSIM netlinf family */ -static struct genl_family hwsim_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "MAC80211_HWSIM", - .version = 1, - .maxattr = HWSIM_ATTR_MAX, - .netnsok = true, -}; +/* MAC80211_HWSIM netlink family */ +static struct genl_family hwsim_genl_family; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, @@ -2256,35 +2249,51 @@ static void mac80211_hwsim_get_et_stats(struct ieee80211_hw *hw, WARN_ON(i != MAC80211_HWSIM_SSTATS_LEN); } +#define HWSIM_COMMON_OPS \ + .tx = mac80211_hwsim_tx, \ + .start = mac80211_hwsim_start, \ + .stop = mac80211_hwsim_stop, \ + .add_interface = mac80211_hwsim_add_interface, \ + .change_interface = mac80211_hwsim_change_interface, \ + .remove_interface = mac80211_hwsim_remove_interface, \ + .config = mac80211_hwsim_config, \ + .configure_filter = mac80211_hwsim_configure_filter, \ + .bss_info_changed = mac80211_hwsim_bss_info_changed, \ + .sta_add = mac80211_hwsim_sta_add, \ + .sta_remove = mac80211_hwsim_sta_remove, \ + .sta_notify = mac80211_hwsim_sta_notify, \ + .set_tim = mac80211_hwsim_set_tim, \ + .conf_tx = mac80211_hwsim_conf_tx, \ + .get_survey = mac80211_hwsim_get_survey, \ + CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd) \ + .ampdu_action = mac80211_hwsim_ampdu_action, \ + .flush = mac80211_hwsim_flush, \ + .get_tsf = mac80211_hwsim_get_tsf, \ + .set_tsf = mac80211_hwsim_set_tsf, \ + .get_et_sset_count = mac80211_hwsim_get_et_sset_count, \ + .get_et_stats = mac80211_hwsim_get_et_stats, \ + .get_et_strings = mac80211_hwsim_get_et_strings, + static const struct ieee80211_ops mac80211_hwsim_ops = { - .tx = mac80211_hwsim_tx, - .start = mac80211_hwsim_start, - .stop = mac80211_hwsim_stop, - .add_interface = mac80211_hwsim_add_interface, - .change_interface = mac80211_hwsim_change_interface, - .remove_interface = mac80211_hwsim_remove_interface, - .config = mac80211_hwsim_config, - .configure_filter = mac80211_hwsim_configure_filter, - .bss_info_changed = mac80211_hwsim_bss_info_changed, - .sta_add = mac80211_hwsim_sta_add, - .sta_remove = mac80211_hwsim_sta_remove, - .sta_notify = mac80211_hwsim_sta_notify, - .set_tim = mac80211_hwsim_set_tim, - .conf_tx = mac80211_hwsim_conf_tx, - .get_survey = mac80211_hwsim_get_survey, - CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd) - .ampdu_action = mac80211_hwsim_ampdu_action, + HWSIM_COMMON_OPS .sw_scan_start = mac80211_hwsim_sw_scan, .sw_scan_complete = mac80211_hwsim_sw_scan_complete, - .flush = mac80211_hwsim_flush, - .get_tsf = mac80211_hwsim_get_tsf, - .set_tsf = mac80211_hwsim_set_tsf, - .get_et_sset_count = mac80211_hwsim_get_et_sset_count, - .get_et_stats = mac80211_hwsim_get_et_stats, - .get_et_strings = mac80211_hwsim_get_et_strings, }; -static struct ieee80211_ops mac80211_hwsim_mchan_ops; +static const struct ieee80211_ops mac80211_hwsim_mchan_ops = { + HWSIM_COMMON_OPS + .hw_scan = mac80211_hwsim_hw_scan, + .cancel_hw_scan = mac80211_hwsim_cancel_hw_scan, + .sw_scan_start = NULL, + .sw_scan_complete = NULL, + .remain_on_channel = mac80211_hwsim_roc, + .cancel_remain_on_channel = mac80211_hwsim_croc, + .add_chanctx = mac80211_hwsim_add_chanctx, + .remove_chanctx = mac80211_hwsim_remove_chanctx, + .change_chanctx = mac80211_hwsim_change_chanctx, + .assign_vif_chanctx = mac80211_hwsim_assign_vif_chanctx, + .unassign_vif_chanctx = mac80211_hwsim_unassign_vif_chanctx, +}; struct hwsim_new_radio_params { unsigned int channels; @@ -2791,7 +2800,6 @@ static void mac80211_hwsim_free(void) static const struct net_device_ops hwsim_netdev_ops = { .ndo_start_xmit = hwsim_mon_xmit, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -3236,6 +3244,18 @@ static const struct genl_ops hwsim_ops[] = { }, }; +static struct genl_family hwsim_genl_family __ro_after_init = { + .name = "MAC80211_HWSIM", + .version = 1, + .maxattr = HWSIM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = hwsim_ops, + .n_ops = ARRAY_SIZE(hwsim_ops), + .mcgrps = hwsim_mcgrps, + .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), +}; + static void destroy_radio(struct work_struct *work) { struct mac80211_hwsim_data *data = @@ -3283,15 +3303,13 @@ static struct notifier_block hwsim_netlink_notifier = { .notifier_call = mac80211_hwsim_netlink_notify, }; -static int hwsim_init_netlink(void) +static int __init hwsim_init_netlink(void) { int rc; printk(KERN_INFO "mac80211_hwsim: initializing netlink\n"); - rc = genl_register_family_with_ops_groups(&hwsim_genl_family, - hwsim_ops, - hwsim_mcgrps); + rc = genl_register_family(&hwsim_genl_family); if (rc) goto failure; @@ -3360,21 +3378,6 @@ static int __init init_mac80211_hwsim(void) if (channels < 1) return -EINVAL; - mac80211_hwsim_mchan_ops = mac80211_hwsim_ops; - mac80211_hwsim_mchan_ops.hw_scan = mac80211_hwsim_hw_scan; - mac80211_hwsim_mchan_ops.cancel_hw_scan = mac80211_hwsim_cancel_hw_scan; - mac80211_hwsim_mchan_ops.sw_scan_start = NULL; - mac80211_hwsim_mchan_ops.sw_scan_complete = NULL; - mac80211_hwsim_mchan_ops.remain_on_channel = mac80211_hwsim_roc; - mac80211_hwsim_mchan_ops.cancel_remain_on_channel = mac80211_hwsim_croc; - mac80211_hwsim_mchan_ops.add_chanctx = mac80211_hwsim_add_chanctx; - mac80211_hwsim_mchan_ops.remove_chanctx = mac80211_hwsim_remove_chanctx; - mac80211_hwsim_mchan_ops.change_chanctx = mac80211_hwsim_change_chanctx; - mac80211_hwsim_mchan_ops.assign_vif_chanctx = - mac80211_hwsim_assign_vif_chanctx; - mac80211_hwsim_mchan_ops.unassign_vif_chanctx = - mac80211_hwsim_unassign_vif_chanctx; - spin_lock_init(&hwsim_radio_lock); err = register_pernet_device(&hwsim_net_ops); diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index 8541cbed786d..e3500203715c 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -945,7 +945,6 @@ static const struct net_device_ops lbs_netdev_ops = { .ndo_start_xmit = lbs_hard_start_xmit, .ndo_set_mac_address = lbs_set_mac_address, .ndo_set_rx_mode = lbs_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 4e0c5653054b..236f790e097a 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -1362,11 +1362,13 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) if (rt2x00dev->bcn->limit > 0) rt2x00dev->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC) | - BIT(NL80211_IFTYPE_AP) | #ifdef CONFIG_MAC80211_MESH BIT(NL80211_IFTYPE_MESH_POINT) | #endif - BIT(NL80211_IFTYPE_WDS); +#ifdef CONFIG_WIRELESS_WDS + BIT(NL80211_IFTYPE_WDS) | +#endif + BIT(NL80211_IFTYPE_AP); rt2x00dev->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 0881ba8535f4..4fdc7223c894 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -272,7 +272,6 @@ static const struct net_device_ops ray_netdev_ops = { .ndo_set_config = ray_dev_config, .ndo_get_stats = ray_get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 471521a0db7b..9f39c6cf98fb 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6086,6 +6086,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) ieee80211_hw_set(wl->hw, SUPPORTS_DYNAMIC_PS); ieee80211_hw_set(wl->hw, SIGNAL_DBM); ieee80211_hw_set(wl->hw, SUPPORTS_PS); + ieee80211_hw_set(wl->hw, SUPPORTS_TX_FRAG); wl->hw->wiphy->cipher_suites = cipher_suites; wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 932f3f81e8cf..d9d29ab88184 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1853,7 +1853,6 @@ static const struct net_device_ops wl3501_netdev_ops = { .ndo_stop = wl3501_close, .ndo_start_xmit = wl3501_hard_start_xmit, .ndo_tx_timeout = wl3501_tx_timeout, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c index dea049b2556f..de7ff395977a 100644 --- a/drivers/net/wireless/zydas/zd1201.c +++ b/drivers/net/wireless/zydas/zd1201.c @@ -1724,7 +1724,6 @@ static const struct net_device_ops zd1201_netdev_ops = { .ndo_tx_timeout = zd1201_tx_timeout, .ndo_set_rx_mode = zd1201_set_multicast, .ndo_set_mac_address = zd1201_set_mac_address, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 74dc2bf71428..e30ffd29b7e9 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -302,7 +302,7 @@ static int xenvif_close(struct net_device *dev) static int xenvif_change_mtu(struct net_device *dev, int mtu) { struct xenvif *vif = netdev_priv(dev); - int max = vif->can_sg ? 65535 - VLAN_ETH_HLEN : ETH_DATA_LEN; + int max = vif->can_sg ? ETH_MAX_MTU - VLAN_ETH_HLEN : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -471,6 +471,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->tx_queue_len = XENVIF_QUEUE_LENGTH; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU - VLAN_ETH_HLEN; + /* * Initialise a dummy MAC address. We choose the numerically * largest non-broadcast address to prevent the address getting diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 8674e188b697..55a4488633e4 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -889,16 +889,16 @@ static int connect_ctrl_ring(struct backend_info *be) unsigned int evtchn; int err; - err = xenbus_gather(XBT_NIL, dev->otherend, - "ctrl-ring-ref", "%u", &val, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, + "ctrl-ring-ref", "%u", &val); + if (err < 0) goto done; /* The frontend does not have a control ring */ ring_ref = val; - err = xenbus_gather(XBT_NIL, dev->otherend, - "event-channel-ctrl", "%u", &val, NULL); - if (err) { + err = xenbus_scanf(XBT_NIL, dev->otherend, + "event-channel-ctrl", "%u", &val); + if (err < 0) { xenbus_dev_fatal(dev, err, "reading %s/event-channel-ctrl", dev->otherend); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index bf2744e1e3db..e085c8c31cfe 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1329,6 +1329,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netdev->features |= netdev->hw_features; netdev->ethtool_ops = &xennet_ethtool_ops; + netdev->min_mtu = 0; + netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE; SET_NETDEV_DEV(netdev, &dev->dev); np->netdev = netdev; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 86280b7e41f3..9c13381b6966 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -153,7 +153,10 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) s32 ppb = scaled_ppm_to_ppb(tx->freq); if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; - err = ops->adjfreq(ops, ppb); + if (ops->adjfine) + err = ops->adjfine(ops, tx->freq); + else + err = ops->adjfreq(ops, ppb); ptp->dialed_frequency = tx->freq; } else if (tx->modes == 0) { tx->freq = ptp->dialed_frequency; diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 302e626fe6b0..53d43954a974 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -28,7 +28,7 @@ static ssize_t clock_name_show(struct device *dev, struct ptp_clock *ptp = dev_get_drvdata(dev); return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); } -static DEVICE_ATTR(clock_name, 0444, clock_name_show, NULL); +static DEVICE_ATTR_RO(clock_name); #define PTP_SHOW_INT(name, var) \ static ssize_t var##_show(struct device *dev, \ diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index ad17fc5883f6..ac65f12bcd43 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1032,9 +1032,6 @@ static int ctcm_change_mtu(struct net_device *dev, int new_mtu) struct ctcm_priv *priv; int max_bufsize; - if (new_mtu < 576 || new_mtu > 65527) - return -EINVAL; - priv = dev->ml_priv; max_bufsize = priv->channel[CTCM_READ]->max_bufsize; @@ -1123,6 +1120,8 @@ void static ctcm_dev_setup(struct net_device *dev) dev->type = ARPHRD_SLIP; dev->tx_queue_len = 100; dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->min_mtu = 576; + dev->max_mtu = 65527; } /* diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 251db0a02e73..211b31d9f157 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1888,7 +1888,7 @@ lcs_stop_device(struct net_device *dev) rc = lcs_stopcard(card); if (rc) dev_err(&card->dev->dev, - " Shutting down the LCS device failed\n "); + " Shutting down the LCS device failed\n"); return rc; } diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index b0e8ffdf864b..2981024a2438 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -302,8 +302,7 @@ static char *netiucv_printuser(struct iucv_connection *conn) if (memcmp(conn->userdata, iucvMagic_ebcdic, 16)) { tmp_uid[8] = '\0'; tmp_udat[16] = '\0'; - memcpy(tmp_uid, conn->userid, 8); - memcpy(tmp_uid, netiucv_printname(tmp_uid, 8), 8); + memcpy(tmp_uid, netiucv_printname(conn->userid, 8), 8); memcpy(tmp_udat, conn->userdata, 16); EBCASC(tmp_udat, 16); memcpy(tmp_udat, netiucv_printname(tmp_udat, 16), 16); @@ -1429,27 +1428,6 @@ static struct net_device_stats *netiucv_stats (struct net_device * dev) return &priv->stats; } -/** - * netiucv_change_mtu - * @dev: Pointer to interface struct. - * @new_mtu: The new MTU to use for this interface. - * - * Sets MTU of an interface. - * - * Returns 0 on success, -EINVAL if MTU is out of valid range. - * (valid range is 576 .. NETIUCV_MTU_MAX). - */ -static int netiucv_change_mtu(struct net_device * dev, int new_mtu) -{ - IUCV_DBF_TEXT(trace, 3, __func__); - if (new_mtu < 576 || new_mtu > NETIUCV_MTU_MAX) { - IUCV_DBF_TEXT(setup, 2, "given MTU out of valid range\n"); - return -EINVAL; - } - dev->mtu = new_mtu; - return 0; -} - /* * attributes in sysfs */ @@ -1564,21 +1542,21 @@ static ssize_t buffer_write (struct device *dev, struct device_attribute *attr, { struct netiucv_priv *priv = dev_get_drvdata(dev); struct net_device *ndev = priv->conn->netdev; - char *e; - int bs1; + unsigned int bs1; + int rc; IUCV_DBF_TEXT(trace, 3, __func__); if (count >= 39) return -EINVAL; - bs1 = simple_strtoul(buf, &e, 0); + rc = kstrtouint(buf, 0, &bs1); - if (e && (!isspace(*e))) { - IUCV_DBF_TEXT_(setup, 2, "buffer_write: invalid char %02x\n", - *e); + if (rc == -EINVAL) { + IUCV_DBF_TEXT_(setup, 2, "buffer_write: invalid char %s\n", + buf); return -EINVAL; } - if (bs1 > NETIUCV_BUFSIZE_MAX) { + if ((rc == -ERANGE) || (bs1 > NETIUCV_BUFSIZE_MAX)) { IUCV_DBF_TEXT_(setup, 2, "buffer_write: buffer size %d too large\n", bs1); @@ -1987,12 +1965,13 @@ static const struct net_device_ops netiucv_netdev_ops = { .ndo_stop = netiucv_close, .ndo_get_stats = netiucv_stats, .ndo_start_xmit = netiucv_tx, - .ndo_change_mtu = netiucv_change_mtu, }; static void netiucv_setup_netdevice(struct net_device *dev) { dev->mtu = NETIUCV_MTU_DEFAULT; + dev->min_mtu = 576; + dev->max_mtu = NETIUCV_MTU_MAX; dev->destructor = netiucv_free_netdevice; dev->hard_header_len = NETIUCV_HDRLEN; dev->addr_len = 0; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 20cf29613043..e33558313834 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4202,10 +4202,6 @@ int qeth_change_mtu(struct net_device *dev, int new_mtu) sprintf(dbf_text, "%8x", new_mtu); QETH_CARD_TEXT(card, 4, dbf_text); - if (new_mtu < 64) - return -EINVAL; - if (new_mtu > 65535) - return -EINVAL; if ((!qeth_is_supported(card, IPA_IP_FRAGMENTATION)) && (!qeth_mtu_is_valid(card, new_mtu))) return -EINVAL; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index bb27058fa9f0..9c921c2833f1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1107,6 +1107,8 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->ml_priv = card; card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; + card->dev->min_mtu = 64; + card->dev->max_mtu = ETH_MAX_MTU; card->dev->netdev_ops = &qeth_l2_netdev_ops; card->dev->ethtool_ops = (card->info.type != QETH_CARD_TYPE_OSN) ? diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 272d9e7419be..ac37d050e765 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3140,6 +3140,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->ml_priv = card; card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; + card->dev->min_mtu = 64; + card->dev->max_mtu = ETH_MAX_MTU; card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 68a5c347fae9..845affa112f7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1368,13 +1368,8 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { { .name = "events", /* not really used - see ID discussion below */ }, }; -static struct genl_family pmcraid_event_family = { - /* - * Due to prior multicast group abuse (the code having assumed that - * the family ID can be used as a multicast group ID) we need to - * statically allocate a family (and thus group) ID. - */ - .id = GENL_ID_PMCRAID, +static struct genl_family pmcraid_event_family __ro_after_init = { + .module = THIS_MODULE, .name = "pmcraid", .version = 1, .maxattr = PMCRAID_AEN_ATTR_MAX, @@ -1389,7 +1384,7 @@ static struct genl_family pmcraid_event_family = { * 0 if the pmcraid_event_family is successfully registered * with netlink generic, non-zero otherwise */ -static int pmcraid_netlink_init(void) +static int __init pmcraid_netlink_init(void) { int result; diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index d02e3e31ed29..8130dfe89745 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -259,17 +259,6 @@ static int cvm_oct_common_change_mtu(struct net_device *dev, int new_mtu) #endif int mtu_overhead = ETH_HLEN + ETH_FCS_LEN + vlan_bytes; - /* - * Limit the MTU to make sure the ethernet packets are between - * 64 bytes and 65535 bytes. - */ - if ((new_mtu + mtu_overhead < VLAN_ETH_ZLEN) || - (new_mtu + mtu_overhead > OCTEON_MAX_MTU)) { - pr_err("MTU must be between %d and %d.\n", - VLAN_ETH_ZLEN - mtu_overhead, - OCTEON_MAX_MTU - mtu_overhead); - return -EINVAL; - } dev->mtu = new_mtu; if ((interface < 2) && @@ -457,7 +446,7 @@ int cvm_oct_common_init(struct net_device *dev) dev->ethtool_ops = &cvm_oct_ethtool_ops; cvm_oct_set_mac_filter(dev); - dev->netdev_ops->ndo_change_mtu(dev, dev->mtu); + dev_set_mtu(dev, dev->mtu); /* * Zero out stats for port so we won't mistakenly show @@ -685,6 +674,11 @@ static int cvm_oct_probe(struct platform_device *pdev) int fau = FAU_NUM_PACKET_BUFFERS_TO_FREE; int qos; struct device_node *pip; + int mtu_overhead = ETH_HLEN + ETH_FCS_LEN; + +#if IS_ENABLED(CONFIG_VLAN_8021Q) + mtu_overhead += VLAN_HLEN; +#endif octeon_mdiobus_force_mod_depencency(); @@ -783,6 +777,8 @@ static int cvm_oct_probe(struct platform_device *pdev) strcpy(dev->name, "pow%d"); for (qos = 0; qos < 16; qos++) skb_queue_head_init(&priv->tx_free_list[qos]); + dev->min_mtu = VLAN_ETH_ZLEN - mtu_overhead; + dev->max_mtu = OCTEON_MAX_MTU - mtu_overhead; if (register_netdev(dev) < 0) { pr_err("Failed to register ethernet device for POW\n"); @@ -836,6 +832,8 @@ static int cvm_oct_probe(struct platform_device *pdev) for (qos = 0; qos < cvmx_pko_get_num_queues(port); qos++) cvmx_fau_atomic_write32(priv->fau + qos * 4, 0); + dev->min_mtu = VLAN_ETH_ZLEN - mtu_overhead; + dev->max_mtu = OCTEON_MAX_MTU - mtu_overhead; switch (priv->imode) { /* These types don't support ports to IPD/PKO */ diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c index d976e5e18d50..c9c9821cfc32 100644 --- a/drivers/staging/rtl8188eu/os_dep/mon.c +++ b/drivers/staging/rtl8188eu/os_dep/mon.c @@ -145,7 +145,6 @@ static netdev_tx_t mon_xmit(struct sk_buff *skb, struct net_device *dev) static const struct net_device_ops mon_netdev_ops = { .ndo_start_xmit = mon_xmit, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 4c30eea45f89..5f53fbd565ef 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -2545,7 +2545,6 @@ static const struct net_device_ops rtl8192_netdev_ops = { .ndo_set_rx_mode = _rtl92e_set_multicast, .ndo_set_mac_address = _rtl92e_set_mac_adr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_start_xmit = rtllib_xmit, }; diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 457eeb5f5239..fdb03dccb449 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -4930,7 +4930,6 @@ static const struct net_device_ops rtl8192_netdev_ops = { .ndo_set_rx_mode = r8192_set_multicast, .ndo_set_mac_address = r8192_set_mac_adr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_start_xmit = ieee80211_xmit, }; diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c index 062307ad7fed..2802b900f8ee 100644 --- a/drivers/staging/slicoss/slicoss.c +++ b/drivers/staging/slicoss/slicoss.c @@ -2880,7 +2880,6 @@ static const struct net_device_ops slic_netdev_ops = { .ndo_get_stats = slic_get_stats, .ndo_set_rx_mode = slic_mcast_set_list, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, }; static u32 slic_card_locate(struct adapter *adapter) diff --git a/drivers/staging/unisys/include/iochannel.h b/drivers/staging/unisys/include/iochannel.h index cba4433bcd51..9081b3f8779c 100644 --- a/drivers/staging/unisys/include/iochannel.h +++ b/drivers/staging/unisys/include/iochannel.h @@ -113,12 +113,10 @@ enum net_types { }; -#define ETH_HEADER_SIZE 14 /* size of ethernet header */ - #define ETH_MIN_DATA_SIZE 46 /* minimum eth data size */ -#define ETH_MIN_PACKET_SIZE (ETH_HEADER_SIZE + ETH_MIN_DATA_SIZE) +#define ETH_MIN_PACKET_SIZE (ETH_HLEN + ETH_MIN_DATA_SIZE) -#define ETH_MAX_MTU 16384 /* maximum data size */ +#define VISOR_ETH_MAX_MTU 16384 /* maximum data size */ #ifndef MAX_MACADDR_LEN #define MAX_MACADDR_LEN 6 /* number of bytes in MAC address */ @@ -288,7 +286,7 @@ struct net_pkt_xmt { int len; /* full length of data in the packet */ int num_frags; /* number of fragments in frags containing data */ struct phys_info frags[MAX_PHYS_INFO]; /* physical page information */ - char ethhdr[ETH_HEADER_SIZE]; /* the ethernet header */ + char ethhdr[ETH_HLEN]; /* the ethernet header */ struct { /* these are needed for csum at uisnic end */ u8 valid; /* 1 = struct is valid - else ignore */ @@ -323,7 +321,7 @@ struct net_pkt_xmtdone { */ #define RCVPOST_BUF_SIZE 4032 #define MAX_NET_RCV_CHAIN \ - ((ETH_MAX_MTU + ETH_HEADER_SIZE + RCVPOST_BUF_SIZE - 1) \ + ((VISOR_ETH_MAX_MTU + ETH_HLEN + RCVPOST_BUF_SIZE - 1) \ / RCVPOST_BUF_SIZE) struct net_pkt_rcvpost { diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c index 136700756485..f8a584bf4a77 100644 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@ -791,7 +791,7 @@ visornic_xmit(struct sk_buff *skb, struct net_device *netdev) * pointing to */ firstfraglen = skb->len - skb->data_len; - if (firstfraglen < ETH_HEADER_SIZE) { + if (firstfraglen < ETH_HLEN) { spin_unlock_irqrestore(&devdata->priv_lock, flags); devdata->busy_cnt++; dev_err(&netdev->dev, @@ -864,7 +864,7 @@ visornic_xmit(struct sk_buff *skb, struct net_device *netdev) /* copy ethernet header from first frag into ocmdrsp * - everything else will be pass in frags & DMA'ed */ - memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE); + memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN); /* copy frags info - from skb->data we need to only provide access * beyond eth header */ diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c index 825a63a7c0e3..4762d38a720e 100644 --- a/drivers/staging/wlan-ng/p80211netdev.c +++ b/drivers/staging/wlan-ng/p80211netdev.c @@ -669,18 +669,6 @@ static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr) return result; } -static int wlan_change_mtu(struct net_device *dev, int new_mtu) -{ - /* 2312 is max 802.11 payload, 20 is overhead, (ether + llc +snap) - and another 8 for wep. */ - if ((new_mtu < 68) || (new_mtu > (2312 - 20 - 8))) - return -EINVAL; - - dev->mtu = new_mtu; - - return 0; -} - static const struct net_device_ops p80211_netdev_ops = { .ndo_init = p80211knetdev_init, .ndo_open = p80211knetdev_open, @@ -690,7 +678,6 @@ static const struct net_device_ops p80211_netdev_ops = { .ndo_do_ioctl = p80211knetdev_do_ioctl, .ndo_set_mac_address = p80211knetdev_set_mac_address, .ndo_tx_timeout = p80211knetdev_tx_timeout, - .ndo_change_mtu = wlan_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -756,6 +743,11 @@ int wlan_setup(struct wlandevice *wlandev, struct device *physdev) wdev->wiphy = wiphy; wdev->iftype = NL80211_IFTYPE_STATION; netdev->ieee80211_ptr = wdev; + netdev->min_mtu = 68; + /* 2312 is max 802.11 payload, 20 is overhead, + * (ether + llc + snap) and another 8 for wep. + */ + netdev->max_mtu = (2312 - 20 - 8); netif_stop_queue(netdev); netif_carrier_off(netdev); diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 47562509b489..2b3c8564ace8 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -147,8 +147,8 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { }; /* Our generic netlink family */ -static struct genl_family tcmu_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family tcmu_genl_family __ro_after_init = { + .module = THIS_MODULE, .hdrsize = 0, .name = "TCM-USER", .version = 1, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 226b0b4aced6..911fd964c742 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2163,8 +2163,8 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { { .name = THERMAL_GENL_MCAST_GROUP_NAME, }, }; -static struct genl_family thermal_event_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family thermal_event_genl_family __ro_after_init = { + .module = THIS_MODULE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, @@ -2235,7 +2235,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz, } EXPORT_SYMBOL_GPL(thermal_generate_netlink_event); -static int genetlink_init(void) +static int __init genetlink_init(void) { return genl_register_family(&thermal_event_genl_family); } diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 54cab59e20ed..f3932baed07d 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2711,15 +2711,6 @@ static void gsm_mux_rx_netchar(struct gsm_dlci *dlci, return; } -static int gsm_change_mtu(struct net_device *net, int new_mtu) -{ - struct gsm_mux_net *mux_net = netdev_priv(net); - if ((new_mtu < 8) || (new_mtu > mux_net->dlci->gsm->mtu)) - return -EINVAL; - net->mtu = new_mtu; - return 0; -} - static void gsm_mux_net_init(struct net_device *net) { static const struct net_device_ops gsm_netdev_ops = { @@ -2728,7 +2719,6 @@ static void gsm_mux_net_init(struct net_device *net) .ndo_start_xmit = gsm_mux_net_start_xmit, .ndo_tx_timeout = gsm_mux_net_tx_timeout, .ndo_get_stats = gsm_mux_net_get_stats, - .ndo_change_mtu = gsm_change_mtu, }; net->netdev_ops = &gsm_netdev_ops; @@ -2787,6 +2777,8 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc) return -ENOMEM; } net->mtu = dlci->gsm->mtu; + net->min_mtu = 8; + net->max_mtu = dlci->gsm->mtu; mux_net = netdev_priv(net); mux_net->dlci = dlci; kref_init(&mux_net->ref); diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index c13e27ecb0b7..415885c56435 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -7973,7 +7973,6 @@ static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 7aca2d4670e4..8267bcf2405e 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1768,7 +1768,6 @@ static void hdlcdev_rx(struct slgt_info *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index dec156586de1..d66620f7eaa3 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1887,7 +1887,6 @@ static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index 0473d619d5bf..b4058f0000e4 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -261,19 +261,10 @@ out: return NETDEV_TX_OK; } -static int pn_net_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops pn_netdev_ops = { .ndo_open = pn_net_open, .ndo_stop = pn_net_close, .ndo_start_xmit = pn_net_xmit, - .ndo_change_mtu = pn_net_mtu, }; static void pn_net_setup(struct net_device *dev) @@ -282,6 +273,8 @@ static void pn_net_setup(struct net_device *dev) dev->type = ARPHRD_PHONET; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = PHONET_DEV_MTU; + dev->min_mtu = PHONET_MIN_MTU; + dev->max_mtu = PHONET_MAX_MTU; dev->hard_header_len = 1; dev->dev_addr[0] = PN_MEDIA_USB; dev->addr_len = 1; diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 5d1bd13a56c1..84a1709e0784 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -142,15 +142,6 @@ static inline int qlen(struct usb_gadget *gadget, unsigned qmult) /* NETWORK DRIVER HOOKUP (to the layer above this driver) */ -static int ueth_change_mtu(struct net_device *net, int new_mtu) -{ - if (new_mtu <= ETH_HLEN || new_mtu > GETHER_MAX_ETH_FRAME_LEN) - return -ERANGE; - net->mtu = new_mtu; - - return 0; -} - static void eth_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *p) { struct eth_dev *dev = netdev_priv(net); @@ -729,7 +720,6 @@ static const struct net_device_ops eth_netdev_ops = { .ndo_open = eth_open, .ndo_stop = eth_stop, .ndo_start_xmit = eth_start_xmit, - .ndo_change_mtu = ueth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -792,6 +782,10 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->ethtool_ops = &ops; + /* MTU range: 14 - 15412 */ + net->min_mtu = ETH_HLEN; + net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); SET_NETDEV_DEVTYPE(net, &gadget_type); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 1e6e227134d7..0643ae44f342 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -16,11 +16,7 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; -static struct genl_family family = { - .id = GENL_ID_GENERATE, - .name = DLM_GENL_NAME, - .version = DLM_GENL_VERSION, -}; +static struct genl_family family; static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) { @@ -76,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = { }, }; +static struct genl_family family __ro_after_init = { + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, + .ops = dlm_nl_ops, + .n_ops = ARRAY_SIZE(dlm_nl_ops), + .module = THIS_MODULE, +}; + int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family(&family); } void dlm_netlink_exit(void) diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..8c9fb29c6673 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -118,7 +118,7 @@ again: return ret; } -static int open_related_ns(struct ns_common *ns, +int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 8b252673d454..e99b1a72d9a7 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -12,14 +12,8 @@ static const struct genl_multicast_group quota_mcgrps[] = { }; /* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, +static struct genl_family quota_genl_family __ro_after_init = { + .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h deleted file mode 100644 index 2383dd20ff43..000000000000 --- a/include/dt-bindings/net/mscc-phy-vsc8531.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Device Tree constants for Microsemi VSC8531 PHY - * - * Author: Nagaraju Lakkaraju - * - * License: Dual MIT/GPL - * Copyright (c) 2016 Microsemi Corporation - */ - -#ifndef _DT_BINDINGS_MSCC_VSC8531_H -#define _DT_BINDINGS_MSCC_VSC8531_H - -/* MAC interface Edge rate control VDDMAC in milli Volts */ -#define MSCC_VDDMAC_3300 3300 -#define MSCC_VDDMAC_2500 2500 -#define MSCC_VDDMAC_1800 1800 -#define MSCC_VDDMAC_1500 1500 -#define MSCC_VDDMAC_MAX 4 -#define MSCC_SLOWDOWN_MAX 8 - -#endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c201017b5730..01c1487277b2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -234,6 +234,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); +void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); @@ -303,6 +304,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) return ERR_PTR(-EOPNOTSUPP); } +static inline void bpf_prog_sub(struct bpf_prog *prog, int i) +{ +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } @@ -319,6 +324,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..ac5b393ee6b2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -23,13 +23,13 @@ struct bpf_reg_state { * result in a bad access. */ u64 min_value, max_value; + u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; /* valid when type == PTR_TO_PACKET* */ struct { - u32 id; u16 off; u16 range; }; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index e3354b74286c..848dc508ef57 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,11 +13,13 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 @@ -55,6 +57,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 + /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) #define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff) @@ -105,11 +108,14 @@ #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 /* * Broadcom LED source encodings. These are used in BCM5461, BCM5481, @@ -189,6 +195,12 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BCM54810 Registers */ +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) +#define BCM54810_SHD_CLK_CTL 0x3 +#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..bf1907d96097 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations *debugfs_real_fops(struct file *filp) +static inline const struct file_operations * +debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index c934d3a96b5e..2896f93808ae 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -67,7 +67,7 @@ * genl_magic_func.h * generates an entry in the static genl_ops array, * and static register/unregister functions to - * genl_register_family_with_ops(). + * genl_register_family(). * * flags and handler: * GENL_op_init( .doit = x, .dumpit = y, .flags = something) diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 9a79f0106da1..32c22cfb238b 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -26,7 +26,6 @@ #ifdef __KERNEL__ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); -int fddi_change_mtu(struct net_device *dev, int new_mtu); struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 667c31101b8b..377257d8f7e3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -259,16 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * {{{2 */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) -static struct genl_family ZZZ_genl_family __read_mostly = { - .id = GENL_ID_GENERATE, - .name = __stringify(GENL_MAGIC_FAMILY), - .version = GENL_MAGIC_VERSION, -#ifdef GENL_MAGIC_FAMILY_HDRSZ - .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), -#endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, -}; - +static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper @@ -302,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) +static struct genl_family ZZZ_genl_family __ro_after_init = { + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .ops = ZZZ_genl_ops, + .n_ops = ARRAY_SIZE(ZZZ_genl_ops), + .mcgrps = ZZZ_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), + .module = THIS_MODULE, +}; + int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) { - return genl_register_family_with_ops_groups(&ZZZ_genl_family, \ - ZZZ_genl_ops, \ - ZZZ_genl_mcgrps); + return genl_register_family(&ZZZ_genl_family); } void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e31bcd4c7859..97585d9679f3 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -93,8 +93,6 @@ static __inline__ void debug_frame(const struct sk_buff *skb) int hdlc_open(struct net_device *dev); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev); -/* May be used by hardware driver */ -int hdlc_change_mtu(struct net_device *dev, int new_mtu); /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 8ec23fb0b412..402f99e328d4 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,6 @@ struct hippi_cb { }; __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -int hippi_change_mtu(struct net_device *dev, int new_mtu); int hippi_mac_addr(struct net_device *dev, void *p); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); struct net_device *alloc_hippi_dev(int sizeof_priv); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a80516fd65c8..fe849329511a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation { #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 +#define WLAN_AUTH_FILS_SK 4 +#define WLAN_AUTH_FILS_SK_PFS 5 +#define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 @@ -1960,6 +1963,26 @@ enum ieee80211_eid { WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_CAG_NUMBER = 237, + WLAN_EID_AP_CSN = 239, + WLAN_EID_FILS_INDICATION = 240, + WLAN_EID_DILS = 241, + WLAN_EID_FRAGMENT = 242, + WLAN_EID_EXTENSION = 255 +}; + +/* Element ID Extensions for Element ID 255 */ +enum ieee80211_eid_ext { + WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, + WLAN_EID_EXT_FILS_REQ_PARAMS = 2, + WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, + WLAN_EID_EXT_FILS_SESSION = 4, + WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, + WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, + WLAN_EID_EXT_KEY_DELIVERY = 7, + WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, + WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, + WLAN_EID_EXT_FILS_NONCE = 13, }; /* Action category code */ @@ -2073,6 +2096,9 @@ enum ieee80211_key_len { #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 +#define FILS_NONCE_LEN 16 +#define FILS_MAX_KEK_LEN 64 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0649973ee5b..3f95233b2733 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,10 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif struct ctl_table_header *sysctl_header; }; @@ -229,8 +233,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/linux/mii.h b/include/linux/mii.h index 47492c9631b3..1629a0c32679 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -31,7 +31,11 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_get_link_ksettings( + struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_set_link_ksettings( + struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd); extern int mii_check_gmii_support(struct mii_if_info *mii); extern void mii_check_link (struct mii_if_info *mii); extern unsigned int mii_check_media (struct mii_if_info *mii, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 93ebc5e21334..949b24b6c479 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,10 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, }; +enum { + MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -69,8 +73,8 @@ enum mlx5_flow_namespace_type { struct mlx5_flow_table; struct mlx5_flow_group; -struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_handle; struct mlx5_flow_spec { u8 match_criteria_enable; @@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, int max_num_groups, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, @@ -124,21 +130,28 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_flow_act { + u32 action; + u32 flow_tag; + u32 encap_id; +}; + /* Single destination per rule. * Group ID is implied by the match criteria. */ -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest); -void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); - -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest); - -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num); +void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest); + +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d58065..2632cb2caf10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -145,6 +145,12 @@ enum { MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, + MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT = 0x784, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT = 0x785, + MLX5_CMD_OP_CREATE_QOS_PARA_VPORT = 0x786, + MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT = 0x787, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -537,13 +543,27 @@ struct mlx5_ifc_e_switch_cap_bits { struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; - u8 reserved_0[0x1f]; - u8 reserved_1[0x20]; + u8 esw_scheduling[0x1]; + u8 reserved_at_2[0x1e]; + + u8 reserved_at_20[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; - u8 reserved_2[0x10]; + + u8 reserved_at_80[0x10]; u8 packet_pacing_rate_table_size[0x10]; - u8 reserved_3[0x760]; + + u8 esw_element_type[0x10]; + u8 esw_tsar_type[0x10]; + + u8 reserved_at_c0[0x10]; + u8 max_qos_para_vport[0x10]; + + u8 max_tsar_bw_share[0x20]; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { @@ -2333,6 +2353,30 @@ struct mlx5_ifc_sqc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, + SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, +}; + +struct mlx5_ifc_scheduling_context_bits { + u8 element_type[0x8]; + u8 reserved_at_8[0x18]; + + u8 element_attributes[0x20]; + + u8 parent_element_id[0x20]; + + u8 reserved_at_60[0x40]; + + u8 bw_share[0x20]; + + u8 max_average_bw[0x20]; + + u8 reserved_at_e0[0x120]; +}; + struct mlx5_ifc_rqtc_bits { u8 reserved_at_0[0xa0]; @@ -2844,7 +2888,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x200]; + u8 reserved_at_180[0x880]; struct mlx5_ifc_wq_bits wq; }; @@ -2920,6 +2964,29 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3540,6 +3607,39 @@ struct mlx5_ifc_query_special_contexts_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_query_scheduling_element_out_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xc0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + +enum { + SCHEDULING_HIERARCHY_E_SWITCH = 0x2, +}; + +struct mlx5_ifc_query_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4725,6 +4825,43 @@ struct mlx5_ifc_modify_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_modify_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +enum { + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2, +}; + +struct mlx5_ifc_modify_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x20]; + + u8 modify_bitmask[0x20]; + + u8 reserved_at_c0[0x40]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5390,6 +5527,30 @@ struct mlx5_ifc_destroy_sq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +struct mlx5_ifc_destroy_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6017,6 +6178,36 @@ struct mlx5_ifc_create_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_create_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_create_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 33c97dc900f8..1cde0fd53f90 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -55,7 +55,7 @@ struct mlx5_srq_attr { u32 lwm; u32 user_index; u64 db_record; - u64 *pas; + __be64 *pas; }; struct mlx5_core_dev; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf04a46f6d5b..86bacf6a64f0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -468,17 +468,6 @@ static inline void napi_complete(struct napi_struct *n) } /** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - -/** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context * @@ -732,8 +721,8 @@ struct xps_dev_maps { struct rcu_head rcu; struct xps_map __rcu *cpu_map[0]; }; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) +#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 @@ -1456,7 +1445,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1506,6 +1494,8 @@ enum netdev_priv_flags { * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value + * @min_mtu: Interface Minimum MTU value + * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @@ -1673,11 +1663,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -1726,6 +1711,8 @@ struct net_device { unsigned char dma; unsigned int mtu; + unsigned int min_mtu; + unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; @@ -1922,34 +1909,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } -static inline -void netdev_reset_tc(struct net_device *dev) -{ - dev->num_tc = 0; - memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); - memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); -} - -static inline -int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) -{ - if (tc >= dev->num_tc) - return -EINVAL; - - dev->tc_to_txq[tc].count = count; - dev->tc_to_txq[tc].offset = offset; - return 0; -} - -static inline -int netdev_set_num_tc(struct net_device *dev, u8 num_tc) -{ - if (num_tc > TC_MAX_QUEUE) - return -EINVAL; - - dev->num_tc = num_tc; - return 0; -} +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); +void netdev_reset_tc(struct net_device *dev); +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); +int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) @@ -2686,71 +2649,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } -struct skb_csum_offl_spec { - __u16 ipv4_okay:1, - ipv6_okay:1, - encap_okay:1, - ip_options_okay:1, - ext_hdrs_okay:1, - tcp_okay:1, - udp_okay:1, - sctp_okay:1, - vlan_okay:1, - no_encapped_ipv6:1, - no_not_encapped:1; -}; - -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help); - -static inline bool skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); -} - -static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec) -{ - bool csum_encapped; - - return skb_csum_offload_chk(skb, spec, &csum_encapped, true); -} - -static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .ipv6_okay = 1, - .vlan_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - -static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - .vlan_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3884,12 +3782,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); @@ -3922,17 +3821,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = &(dev)->all_adj_list.lower, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); @@ -4009,19 +3905,6 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } -/* Map an ethertype into IP protocol if possible */ -static inline int eproto_to_ipproto(int eproto) -{ - switch (eproto) { - case htons(ETH_P_IP): - return IPPROTO_IP; - case htons(ETH_P_IPV6): - return IPPROTO_IPV6; - default: - return -1; - } -} - #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index abc7fdcb9eb1..69230140215b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -49,13 +49,11 @@ struct sock; struct nf_hook_state { unsigned int hook; - int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; - struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; @@ -82,9 +80,8 @@ struct nf_hook_entry { }; static inline void nf_hook_state_init(struct nf_hook_state *p, - struct nf_hook_entry *hook_entry, unsigned int hook, - int thresh, u_int8_t pf, + u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, @@ -92,13 +89,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; - p->thresh = thresh; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; - RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } @@ -152,23 +147,20 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry); /** - * nf_hook_thresh - call a netfilter hook + * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, - struct net *net, - struct sock *sk, - struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entry *hook_head; int ret = 1; @@ -185,24 +177,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, thresh, - pf, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook, pf, indev, outdev, + sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, hook_head); } rcu_read_unlock(); return ret; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, - struct sock *sk, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *)) -{ - return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); -} - /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). @@ -221,19 +205,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, - struct sk_buff *skb, struct net_device *in, - struct net_device *out, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) -{ - int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); - if (ret == 1) - ret = okfn(net, sk, skb); - return ret; -} - -static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), @@ -242,7 +213,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } @@ -252,7 +223,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); + int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); + if (ret == 1) + ret = okfn(net, sk, skb); + return ret; } /* Call setsockopt() */ diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 83b9a2e0d8d4..8e42253e5d4d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -79,10 +79,12 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +struct ip_set; + /* Extension type */ struct ip_set_ext_type { /* Destroy extension private data (can be NULL) */ - void (*destroy)(void *ext); + void (*destroy)(struct ip_set *set, void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -92,17 +94,6 @@ struct ip_set_ext_type { extern const struct ip_set_ext_type ip_set_extensions[]; -struct ip_set_ext { - u64 packets; - u64 bytes; - u32 timeout; - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; - char *comment; -}; - struct ip_set_counter { atomic64_t bytes; atomic64_t packets; @@ -122,6 +113,15 @@ struct ip_set_skbinfo { u32 skbmarkmask; u32 skbprio; u16 skbqueue; + u16 __pad; +}; + +struct ip_set_ext { + struct ip_set_skbinfo skbinfo; + u64 packets; + u64 bytes; + char *comment; + u32 timeout; }; struct ip_set; @@ -252,6 +252,10 @@ struct ip_set { u8 flags; /* Default timeout value, if enabled */ u32 timeout; + /* Number of elements (vs timeout) */ + u32 elements; + /* Size of the dynamic extensions (vs timeout) */ + size_t ext_size; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ @@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data) */ if (SET_WITH_COMMENT(set)) ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( - ext_comment(data, set)); + set, ext_comment(data, set)); } static inline int @@ -294,104 +298,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); } -static inline void -ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -{ - atomic64_add((long long)bytes, &(counter)->bytes); -} - -static inline void -ip_set_add_packets(u64 packets, struct ip_set_counter *counter) -{ - atomic64_add((long long)packets, &(counter)->packets); -} - -static inline u64 -ip_set_get_bytes(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->bytes); -} - -static inline u64 -ip_set_get_packets(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->packets); -} - -static inline void -ip_set_update_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - if (ext->packets != ULLONG_MAX && - !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { - ip_set_add_bytes(ext->bytes, counter); - ip_set_add_packets(ext->packets, counter); - } - if (flags & IPSET_FLAG_MATCH_COUNTERS) { - mext->packets = ip_set_get_packets(counter); - mext->bytes = ip_set_get_bytes(counter); - } -} - -static inline void -ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; -} -static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) -{ - /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && - nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask), - IPSET_ATTR_PAD)) || - (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); -} - -static inline void -ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext) -{ - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; -} - -static inline bool -ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ @@ -431,6 +337,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); +extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active); static inline int ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) @@ -546,10 +454,8 @@ bitmap_bytes(u32 a, u32 b) #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_comment.h> - -int -ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active); +#include <linux/netfilter/ipset/ip_set_counter.h> +#include <linux/netfilter/ipset/ip_set_skbinfo.h> #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 5e4662a71e01..366d6c0ea04f 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -6,8 +6,8 @@ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF enum { + IPSET_ADD_STORE_PLAIN_TIMEOUT = -1, IPSET_ADD_FAILED = 1, - IPSET_ADD_STORE_PLAIN_TIMEOUT, IPSET_ADD_START_STORED_TIMEOUT, }; diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8d0248525957..8e2bab1e8e90 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb) * The kadt functions don't use the comment extensions in any way. */ static inline void -ip_set_init_comment(struct ip_set_comment *comment, +ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext) { struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; if (unlikely(c)) { + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } @@ -34,16 +35,17 @@ ip_set_init_comment(struct ip_set_comment *comment, return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); + set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } /* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int -ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); @@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) * of the set data anymore. */ static inline void -ip_set_comment_free(struct ip_set_comment *comment) +ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) { struct ip_set_comment_rcu *c; c = rcu_dereference_protected(comment->c, 1); if (unlikely(!c)) return; + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h new file mode 100644 index 000000000000..bb6fba480118 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_counter.h @@ -0,0 +1,75 @@ +#ifndef _IP_SET_COUNTER_H +#define _IP_SET_COUNTER_H + +/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) +{ + atomic64_add((long long)bytes, &(counter)->bytes); +} + +static inline void +ip_set_add_packets(u64 packets, struct ip_set_counter *counter) +{ + atomic64_add((long long)packets, &(counter)->packets); +} + +static inline u64 +ip_set_get_bytes(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->bytes); +} + +static inline u64 +ip_set_get_packets(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->packets); +} + +static inline void +ip_set_update_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + if (ext->packets != ULLONG_MAX && + !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { + ip_set_add_bytes(ext->bytes, counter); + ip_set_add_packets(ext->packets, counter); + } + if (flags & IPSET_FLAG_MATCH_COUNTERS) { + mext->packets = ip_set_get_packets(counter); + mext->bytes = ip_set_get_bytes(counter); + } +} + +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_COUNTER_H */ diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h new file mode 100644 index 000000000000..29d7ef2bc3fa --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h @@ -0,0 +1,46 @@ +#ifndef _IP_SET_SKBINFO_H +#define _IP_SET_SKBINFO_H + +/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + mext->skbinfo = *skbinfo; +} + +static inline bool +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) +{ + /* Send nonzero parameters only */ + return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + nla_put_net64(skb, IPSET_ATTR_SKBMARK, + cpu_to_be64((u64)skbinfo->skbmark << 32 | + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || + (skbinfo->skbprio && + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + cpu_to_be32(skbinfo->skbprio))) || + (skbinfo->skbqueue && + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + cpu_to_be16(skbinfo->skbqueue))); +} + +static inline void +ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext) +{ + *skbinfo = ext->skbinfo; +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_SKBINFO_H */ diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 1d6a935c1ac5..bfb3531fd88a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_expired(unsigned long *t) +ip_set_timeout_expired(const unsigned long *t) { return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } @@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) } static inline u32 -ip_set_timeout_get(unsigned long *timeout) +ip_set_timeout_get(const unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2ad1a2b289b5..cd4eaf8df445 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include <linux/netdevice.h> #include <linux/static_key.h> +#include <linux/netfilter.h> #include <uapi/linux/netfilter/x_tables.h> /* Test a struct->invflags and a boolean for inequality */ @@ -17,14 +18,9 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data - * @net network namespace through which the action was invoked - * @in: input netdevice - * @out: output netdevice + * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hook: hook number given packet came from - * @family: Actual NFPROTO_* through which the function is invoked - * (helpful when match->family == NFPROTO_UNSPEC) * * Fields written to by extensions: * @@ -38,15 +34,47 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; - struct net *net; - const struct net_device *in, *out; + const struct nf_hook_state *state; int fragoff; unsigned int thoff; - unsigned int hooknum; - u_int8_t family; bool hotdrop; }; +static inline struct net *xt_net(const struct xt_action_param *par) +{ + return par->state->net; +} + +static inline struct net_device *xt_in(const struct xt_action_param *par) +{ + return par->state->in; +} + +static inline const char *xt_inname(const struct xt_action_param *par) +{ + return par->state->in->name; +} + +static inline struct net_device *xt_out(const struct xt_action_param *par) +{ + return par->state->out; +} + +static inline const char *xt_outname(const struct xt_action_param *par) +{ + return par->state->out->name; +} + +static inline unsigned int xt_hooknum(const struct xt_action_param *par) +{ + return par->state->hook; +} + +static inline u_int8_t xt_family(const struct xt_action_param *par) +{ + return par->state->pf; +} + /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 33e37fb41d5d..2dc3b49b804a 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state); + return nf_hook_slow(skb, &state, e); } static inline void nf_hook_ingress_init(struct net_device *dev) diff --git a/include/linux/phy.h b/include/linux/phy.h index e25f1830fbcf..9880d73a2c3d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,6 +25,7 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/mod_devicetable.h> +#include <linux/phy_led_triggers.h> #include <linux/atomic.h> @@ -85,6 +86,21 @@ typedef enum { } phy_interface_t; /** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and + * fills the speeds * buffer with the supported speeds. If speeds buffer is + * too small to contain * all currently supported speeds, will return as + * many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size); + +/** * It maps 'enum phy_interface_t' found in include/linux/phy.h * into the device tree binding of 'phy-mode', so that Ethernet * device driver can get phy interface from device tree. @@ -343,7 +359,7 @@ struct phy_c45_device_ids { * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine - * phy_queue: A work_queue for the interrupt + * phy_queue: A work_queue for the phy_mac_interrupt * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -405,6 +421,12 @@ struct phy_device { int link_timeout; +#ifdef CONFIG_LED_TRIGGER_PHY + struct phy_led_trigger *phy_led_triggers; + unsigned int phy_num_led_triggers; + struct phy_led_trigger *last_triggered; +#endif + /* * Interrupt number for this PHY * -1 means no interrupt @@ -764,6 +786,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); @@ -802,7 +825,8 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct work_struct *work); +void phy_change(struct phy_device *phydev); +void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h new file mode 100644 index 000000000000..a2daea0a37d2 --- /dev/null +++ b/include/linux/phy_led_triggers.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __PHY_LED_TRIGGERS +#define __PHY_LED_TRIGGERS + +struct phy_device; + +#ifdef CONFIG_LED_TRIGGER_PHY + +#include <linux/leds.h> + +#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 +#define PHY_MII_BUS_ID_SIZE (20 - 3) + +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ + FIELD_SIZEOF(struct mdio_device, addr)+\ + PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) + +struct phy_led_trigger { + struct led_trigger trigger; + char name[PHY_LINK_LED_TRIGGER_NAME_SIZE]; + unsigned int speed; +}; + + +extern int phy_led_triggers_register(struct phy_device *phy); +extern void phy_led_triggers_unregister(struct phy_device *phy); +extern void phy_led_trigger_change_speed(struct phy_device *phy); + +#else + +static inline int phy_led_triggers_register(struct phy_device *phy) +{ + return 0; +} +static inline void phy_led_triggers_unregister(struct phy_device *phy) { } +static inline void phy_led_trigger_change_speed(struct phy_device *phy) { } + +#endif + +#endif diff --git a/include/linux/pim.h b/include/linux/pim.h index e1d756f81348..0e81b2778ae0 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PIM_H #define __LINUX_PIM_H +#include <linux/skbuff.h> #include <asm/byteorder.h> /* Message types - V1 */ @@ -9,24 +10,86 @@ /* Message types - V2 */ #define PIM_VERSION 2 -#define PIM_REGISTER 1 + +/* RFC7761, sec 4.9: + * Type + * Types for specific PIM messages. PIM Types are: + * + * Message Type Destination + * --------------------------------------------------------------------- + * 0 = Hello Multicast to ALL-PIM-ROUTERS + * 1 = Register Unicast to RP + * 2 = Register-Stop Unicast to source of Register + * packet + * 3 = Join/Prune Multicast to ALL-PIM-ROUTERS + * 4 = Bootstrap Multicast to ALL-PIM-ROUTERS + * 5 = Assert Multicast to ALL-PIM-ROUTERS + * 6 = Graft (used in PIM-DM only) Unicast to RPF'(S) + * 7 = Graft-Ack (used in PIM-DM only) Unicast to source of Graft + * packet + * 8 = Candidate-RP-Advertisement Unicast to Domain's BSR + */ +enum { + PIM_TYPE_HELLO, + PIM_TYPE_REGISTER, + PIM_TYPE_REGISTER_STOP, + PIM_TYPE_JOIN_PRUNE, + PIM_TYPE_BOOTSTRAP, + PIM_TYPE_ASSERT, + PIM_TYPE_GRAFT, + PIM_TYPE_GRAFT_ACK, + PIM_TYPE_CANDIDATE_RP_ADV +}; #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) -static inline bool ipmr_pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} +/* RFC7761, sec 4.9: + * The PIM header common to all PIM messages is: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |PIM Ver| Type | Reserved | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct pimhdr { + __u8 type; + __u8 reserved; + __be16 csum; +}; /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ -struct pimreghdr -{ +struct pimreghdr { __u8 type; __u8 reserved; __be16 csum; __be32 flags; }; -struct sk_buff; -extern int pim_rcv_v1(struct sk_buff *); +int pim_rcv_v1(struct sk_buff *skb); + +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + +static inline struct pimhdr *pim_hdr(const struct sk_buff *skb) +{ + return (struct pimhdr *)skb_transport_header(skb); +} + +static inline u8 pim_hdr_version(const struct pimhdr *pimhdr) +{ + return pimhdr->type >> 4; +} + +static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) +{ + return pimhdr->type & 0xf; +} + +/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */ +static inline bool pim_ipv4_all_pim_routers(__be32 addr) +{ + return addr == htonl(0xE000000D); +} #endif diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b97bf2ef996e..368c7ad06ae5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir( return proc_mkdir_data(name, 0, parent, net); } +struct ns_common; +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)); + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 5ad54fc66cf0..b76d47aba564 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -58,7 +58,14 @@ struct system_device_crosststamp; * * clock operations * + * @adjfine: Adjusts the frequency of the hardware clock. + * parameter scaled_ppm: Desired frequency offset from + * nominal frequency in parts per million, but with a + * 16 bit binary fractional field. + * * @adjfreq: Adjusts the frequency of the hardware clock. + * This method is deprecated. New drivers should implement + * the @adjfine method instead. * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * @@ -108,6 +115,7 @@ struct ptp_clock_info { int n_pins; int pps; struct ptp_pin_desc *pin_config; + int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 33c24ebc9b7f..9755a3feb52e 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -22,7 +22,8 @@ struct qed_dev_eth_info { u8 num_tc; u8 port_mac[ETH_ALEN]; - u8 num_vlan_filters; + u16 num_vlan_filters; + u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ bool is_legacy; @@ -129,7 +130,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; - void (*force_mac) (void *dev, u8 *mac); + void (*force_mac) (void *dev, u8 *mac, bool forced); }; #ifdef CONFIG_DCB diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8978a60371f4..ea095b4893aa 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -267,6 +267,9 @@ struct qed_dev_info { u8 mf_mode; bool tx_switching; bool rdma_supported; + u16 mtu; + + bool wol_support; }; enum qed_sb_type { @@ -401,6 +404,15 @@ struct qed_selftest_ops { * @return 0 on success, error otherwise. */ int (*selftest_clock)(struct qed_dev *cdev); + +/** + * @brief selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_nvram) (struct qed_dev *cdev); }; struct qed_common_ops { @@ -554,6 +566,41 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief update_drv_state - API to inform the change in the driver state. + * + * @param cdev + * @param active + * + */ + int (*update_drv_state)(struct qed_dev *cdev, bool active); + +/** + * @brief update_mac - API to inform the change in the mac address + * + * @param cdev + * @param mac + * + */ + int (*update_mac)(struct qed_dev *cdev, u8 *mac); + +/** + * @brief update_mtu - API to inform the change in the mtu + * + * @param cdev + * @param mtu + * + */ + int (*update_mtu)(struct qed_dev *cdev, u16 mtu); + +/** + * @brief update_wol - update of changes in the WoL configuration + * + * @param cdev + * @param enabled - true iff WoL should be enabled. + */ + int (*update_wol) (struct qed_dev *cdev, bool enabled); }; #define MASK_FIELD(_name, _value) \ diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include <uapi/linux/seg6.h> + +#endif diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include <uapi/linux/seg6_genl.h> + +#endif diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include <uapi/linux/seg6_hmac.h> + +#endif diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include <uapi/linux/seg6_iptunnel.h> + +#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32810f279f8e..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1087,7 +1087,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index e302c447e057..129bc674dcf5 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -39,6 +39,7 @@ struct smc91x_platdata { unsigned long flags; unsigned char leda; unsigned char ledb; + bool pxa_u16_align4; /* PXA buggy u16 writes on 4*n+2 addresses */ }; #endif /* __SMC91X_H__ */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 705840e0438f..3537fb33cc90 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -135,8 +135,6 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void (*suspend)(struct platform_device *pdev, void *priv); - void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..32a7c7e35b71 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -176,8 +176,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 82f3c912a5b1..d8eae87ea778 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -42,7 +42,6 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfa_act #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 14b51d739c3b..2019310cf135 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -772,6 +772,30 @@ struct cfg80211_csa_settings { }; /** + * struct iface_combination_params - input parameters for interface combinations + * + * Used to pass interface combination parameters + * + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the number of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + * @new_beacon_int: set this to the beacon interval of a new interface + * that's not operating yet, if such is to be checked as part of + * the verification + */ +struct iface_combination_params { + int num_different_channels; + u8 radar_detect; + int iftype_num[NUM_NL80211_IFTYPES]; + u32 new_beacon_int; +}; + +/** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability @@ -1761,9 +1785,11 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication - * @sae_data: Non-IE data to use with SAE or %NULL. This starts with - * Authentication transaction sequence number field. - * @sae_data_len: Length of sae_data buffer in octets + * @auth_data: Fields and elements in Authentication frames. This contains + * the authentication frame body (non-IE and IE data), excluding the + * Authentication algorithm number, i.e., starting at the Authentication + * transaction sequence number field. + * @auth_data_len: Length of auth_data buffer in octets */ struct cfg80211_auth_request { struct cfg80211_bss *bss; @@ -1772,8 +1798,8 @@ struct cfg80211_auth_request { enum nl80211_auth_type auth_type; const u8 *key; u8 key_len, key_idx; - const u8 *sae_data; - size_t sae_data_len; + const u8 *auth_data; + size_t auth_data_len; }; /** @@ -1814,6 +1840,12 @@ enum cfg80211_assoc_req_flags { * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use + * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or + * %NULL if FILS is not used. + * @fils_kek_len: Length of fils_kek in octets + * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association + * Request/Response frame or %NULL if FILS is not used. This field starts + * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1825,6 +1857,9 @@ struct cfg80211_assoc_request { struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; + const u8 *fils_kek; + size_t fils_kek_len; + const u8 *fils_nonces; }; /** @@ -2016,6 +2051,18 @@ struct cfg80211_connect_params { }; /** + * enum cfg80211_connect_params_changed - Connection parameters being updated + * + * This enum provides information of all connect parameters that + * have to be updated as part of update_connect_params() call. + * + * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated + */ +enum cfg80211_connect_params_changed { + UPDATE_ASSOC_IES = BIT(0), +}; + +/** * enum wiphy_params_flags - set_wiphy_params bitfield values * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed @@ -2536,9 +2583,18 @@ struct cfg80211_nan_func { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. Once done, call - * cfg80211_disconnected(). + * @update_connect_params: Update the connect parameters while connected to a + * BSS. The updated parameters can be used by driver/firmware for + * subsequent BSS selection (roaming) decisions and to form the + * Authentication/(Re)Association Request frames. This call does not + * request an immediate disassociation or reassociation with the current + * BSS, i.e., this impacts only subsequent (re)associations. The bits in + * changed are defined in &enum cfg80211_connect_params_changed. * (invoked with the wireless_dev mutex held) + * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if + * connection is in progress. Once done, call cfg80211_disconnected() in + * case connection was already established (invoked with the + * wireless_dev mutex held), otherwise call cfg80211_connect_timeout(). * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due @@ -2706,6 +2762,8 @@ struct cfg80211_nan_func { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * + * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2820,6 +2878,10 @@ struct cfg80211_ops { int (*connect)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); + int (*update_connect_params)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_connect_params *sme, + u32 changed); int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, u16 reason_code); @@ -2982,6 +3044,10 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); + + int (*set_multicast_to_unicast)(struct wiphy *wiphy, + struct net_device *dev, + const bool enabled); }; /* @@ -3080,6 +3146,12 @@ struct ieee80211_iface_limit { * only in special cases. * @radar_detect_widths: bitmap of channel widths supported for radar detection * @radar_detect_regions: bitmap of regions supported for radar detection + * @beacon_int_min_gcd: This interface combination supports different + * beacon intervals. + * = 0 - all beacon intervals for different interface must be same. + * > 0 - any beacon interval for the interface part of this combination AND + * *GCD* of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3145,6 +3217,7 @@ struct ieee80211_iface_combination { bool beacon_int_infra_match; u8 radar_detect_widths; u8 radar_detect_regions; + u32 beacon_int_min_gcd; }; struct ieee80211_txrx_stypes { @@ -3752,8 +3825,8 @@ struct cfg80211_cached_keys; * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL - * @p2p_started: true if this is a P2P Device that has been started - * @nan_started: true if this is a NAN interface that has been started + * @is_running: true if this is a non-netdev device that has been started, e.g. + * the P2P Device. * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3785,7 +3858,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started, nan_started; + bool use_4addr, is_running; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -3842,6 +3915,13 @@ static inline u8 *wdev_address(struct wireless_dev *wdev) return wdev->address; } +static inline bool wdev_running(struct wireless_dev *wdev) +{ + if (wdev->netdev) + return netif_running(wdev->netdev); + return wdev->is_running; +} + /** * wdev_priv - return wiphy priv from wireless_dev * @@ -4163,6 +4243,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } /** + * cfg80211_find_ext_ie - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the extended element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data), or a pointer to the first byte of the requested + * element, that is the byte containing the element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 2); +} + +/** * cfg80211_find_vendor_ie - find vendor specific information element in data * * @oui: vendor OUI @@ -4562,7 +4663,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * moves to cfg80211 in this call * @buf: authentication frame (header + body) * @len: length of the frame data - * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a. + * @uapsd_queues: bitmap of queues configured for uapsd. Same format + * as the AC bitmap in the QoS info field * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -5598,36 +5700,20 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); * cfg80211_check_combinations - check interface combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * * This function can be called by the driver to check whether a * combination of interfaces and their types are allowed according to * the interface combinations. */ int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]); + struct iface_combination_params *params); /** * cfg80211_iter_combinations - iterate over matching combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * @iter: function to call for each matching combination * @data: pointer to pass to iter function * @@ -5636,9 +5722,7 @@ int cfg80211_check_combinations(struct wiphy *wiphy, * purposes. */ int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data); diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6965c8f68ade..701fc814d0af 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) @@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, - 0, 0, 0, tunnel_id, flags); + 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } @@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - flags, tunnel_id, md_size); + 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, @@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; - info->key.tp_dst = 0; + info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; @@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, - ip6_flowlabel(ip6h), flags, tunnel_id, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include <net/flow.h> #include <net/rtnetlink.h> +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..6bbbca8af8e3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/atomic.h> #include <net/flow_dissector.h> +#include <linux/uidgid.h> /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -93,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -104,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -131,6 +136,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +182,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d9534927d93b..c4f31666afd2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -128,6 +128,11 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ + FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8d4608ce8716..a34275be3600 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,7 +20,7 @@ struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family idenfitier + * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -39,16 +39,16 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes - * @family_list: family list - * @mcgrps: multicast groups used by this family (private) - * @n_mcgrps: number of multicast groups (private) + * @attrbuf: buffer to store parsed attributes (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family - * @ops: the operations supported by this family (private) - * @n_ops: number of operations supported by this family (private) + * (private) + * @ops: the operations supported by this family + * @n_ops: number of operations supported by this family */ struct genl_family { - unsigned int id; + int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; @@ -64,15 +64,16 @@ struct genl_family { int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ - const struct genl_ops * ops; /* private */ - const struct genl_multicast_group *mcgrps; /* private */ - unsigned int n_ops; /* private */ - unsigned int n_mcgrps; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; + unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ - struct list_head family_list; /* private */ struct module *module; }; +struct nlattr **genl_family_attrbuf(const struct genl_family *family); + /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -130,64 +131,13 @@ struct genl_ops { u8 flags; }; -int __genl_register_family(struct genl_family *family); - -static inline int genl_register_family(struct genl_family *family) -{ - family->module = THIS_MODULE; - return __genl_register_family(family); -} - -/** - * genl_register_family_with_ops - register a generic netlink family with ops - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Return 0 on success or a negative error code. - */ -static inline int -_genl_register_family_with_ops_grps(struct genl_family *family, - const struct genl_ops *ops, size_t n_ops, - const struct genl_multicast_group *mcgrps, - size_t n_mcgrps) -{ - family->module = THIS_MODULE; - family->ops = ops; - family->n_ops = n_ops; - family->mcgrps = mcgrps; - family->n_mcgrps = n_mcgrps; - return __genl_register_family(family); -} - -#define genl_register_family_with_ops(family, ops) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - NULL, 0) -#define genl_register_family_with_ops_groups(family, ops, grps) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - (grps), ARRAY_SIZE(grps)) - -int genl_unregister_family(struct genl_family *family); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +int genl_register_family(struct genl_family *family); +int genl_unregister_family(const struct genl_family *family); +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd); + const struct genl_family *family, int flags, u8 cmd); /** * genlmsg_nlhdr - Obtain netlink header from user specified header @@ -196,8 +146,8 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, * * Returns pointer to netlink header. */ -static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr, - struct genl_family *family) +static inline struct nlmsghdr * +genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) { return (struct nlmsghdr *)((char *)user_hdr - family->hdrsize - @@ -233,7 +183,7 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, void *user_hdr, - struct genl_family *family) + const struct genl_family *family) { nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); } @@ -250,7 +200,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, - struct genl_family *family, + const struct genl_family *family, int flags, u8 cmd) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, @@ -287,7 +237,7 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast_netns(struct genl_family *family, +static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -305,7 +255,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast(struct genl_family *family, +static inline int genlmsg_multicast(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -323,7 +273,7 @@ static inline int genlmsg_multicast(struct genl_family *family, * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct genl_family *family, +int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); @@ -407,8 +357,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -static inline int genl_set_err(struct genl_family *family, struct net *net, - u32 portid, u32 group, int code) +static inline int genl_set_err(const struct genl_family *family, + struct net *net, u32 portid, + u32 group, int code) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -416,7 +367,7 @@ static inline int genl_set_err(struct genl_family *family, struct net *net, return netlink_set_err(net->genl_sock, portid, group, code); } -static inline int genl_has_listeners(struct genl_family *family, +static inline int genl_has_listeners(const struct genl_family *family, struct net *net, unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index ba07b9d8ed63..d0e7e3f8e67a 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -333,9 +333,9 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 197a30d221e9..146054ceea8e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,11 +289,6 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk) return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 236a81034fef..c9cff977a7fb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -228,6 +228,7 @@ struct inet_sock { #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) +#define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket diff --git a/include/net/ip.h b/include/net/ip.h index d3a107850a41..ab6761a7c883 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -577,7 +578,8 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -599,7 +601,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0, 0); + ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f83e78d071a3..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..0a3622bf086f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p); + u8 *proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6..82e76fe1c1f7 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -24,11 +24,11 @@ enum { struct lwtunnel_state { __u16 type; __u16 flags; + __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); - int len; - __u16 headroom; + struct rcu_head rcu; __u8 data[0]; }; @@ -36,6 +36,7 @@ struct lwtunnel_encap_ops { int (*build_state)(struct net_device *dev, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts); + void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, @@ -46,10 +47,7 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e2dba93e374f..5345d358a510 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1442,7 +1442,7 @@ enum ieee80211_vif_flags { struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; - u8 addr[ETH_ALEN]; + u8 addr[ETH_ALEN] __aligned(2); bool p2p; bool csa_active; bool mu_mimo_owner; @@ -1749,7 +1749,8 @@ struct ieee80211_sta_rates { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid - * if wme is supported. + * if wme is supported. The bits order is like in + * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. * @bandwidth: current bandwidth the station can receive with * @rx_nss: in HT/VHT, the maximum number of spatial streams the @@ -2029,6 +2030,10 @@ struct ieee80211_txq { * drivers, mac80211 packet loss mechanism will not be triggered and driver * is completely depending on firmware event for station kickout. * + * @IEEE80211_HW_SUPPORTS_TX_FRAG: Hardware does fragmentation by itself. + * The stack will not do fragmentation. + * The callback for @set_frag_threshold should be set as well. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2070,6 +2075,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, + IEEE80211_HW_SUPPORTS_TX_FRAG, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3098,8 +3104,9 @@ enum ieee80211_reconfig_type { * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this - * if the device does fragmentation by itself; if this callback is - * implemented then the stack will not do fragmentation. + * if the device does fragmentation by itself. Note that to prevent the + * stack from doing fragmentation IEEE80211_HW_SUPPORTS_TX_FRAG + * should be set as well. * The callback can sleep. * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) @@ -4092,6 +4099,10 @@ void ieee80211_sta_pspoll(struct ieee80211_sta *sta); * This must be used in conjunction with ieee80211_sta_ps_transition() * and possibly ieee80211_sta_pspoll(); calls to all three must be * serialized. + * %IEEE80211_NUM_TIDS can be passed as the tid if the tid is unknown. + * In this case, mac80211 will not check that this tid maps to an AC + * that is trigger enabled and assume that the caller did the proper + * checks. */ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index de629f1520df..2152b70626d5 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 309cd267be4f..a559aa41253c 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); +void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix); #endif /* _NF_LOG_H */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe86c56..09948d10e38e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -12,6 +12,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_state state; + struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 000000000000..f2fc39c97d43 --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,27 @@ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +struct net_device; +struct sk_buff; +struct sock; +struct net; + +static inline bool nf_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + default: + return inet_sk(sk)->transparent; + } +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d79d1e9b9546..311f02739b51 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,27 +14,43 @@ struct nft_pktinfo { struct sk_buff *skb; - struct net *net; - const struct net_device *in; - const struct net_device *out; - u8 pf; - u8 hook; bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; }; +static inline struct net *nft_net(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->net; +} + +static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->hook; +} + +static inline u8 nft_pf(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->pf; +} + +static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->in; +} + +static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->out; +} + static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = pkt->xt.net = state->net; - pkt->in = pkt->xt.in = state->in; - pkt->out = pkt->xt.out = state->out; - pkt->hook = pkt->xt.hooknum = state->hook; - pkt->pf = pkt->xt.family = state->pf; + pkt->xt.state = state; } static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 00f4f6b1b1ba..862373d4ea9d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -1,12 +1,18 @@ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H +extern struct nft_expr_type nft_imm_type; +extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_lookup_type; +extern struct nft_expr_type nft_bitwise_type; +extern struct nft_expr_type nft_byteorder_type; +extern struct nft_expr_type nft_payload_type; +extern struct nft_expr_type nft_dynset_type; +extern struct nft_expr_type nft_range_type; + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); -int nft_immediate_module_init(void); -void nft_immediate_module_exit(void); - struct nft_cmp_fast_expr { u32 data; enum nft_registers sreg:8; @@ -25,24 +31,6 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) extern const struct nft_expr_ops nft_cmp_fast_ops; -int nft_cmp_module_init(void); -void nft_cmp_module_exit(void); - -int nft_range_module_init(void); -void nft_range_module_exit(void); - -int nft_lookup_module_init(void); -void nft_lookup_module_exit(void); - -int nft_dynset_module_init(void); -void nft_dynset_module_exit(void); - -int nft_bitwise_module_init(void); -void nft_bitwise_module_exit(void); - -int nft_byteorder_module_init(void); -void nft_byteorder_module_exit(void); - struct nft_payload { enum nft_payload_bases base:8; u8 offset; @@ -62,7 +50,4 @@ struct nft_payload_set { extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_trace_enabled; -int nft_payload_module_init(void); -void nft_payload_module_exit(void); - #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 000000000000..cbedda077db2 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,31 @@ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +struct nft_fib { + enum nft_registers dreg:8; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index); +#endif diff --git a/include/net/netlink.h b/include/net/netlink.h index 254a0fc01800..a34f53acb6d6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1191,6 +1191,16 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) } /** + * nla_memdup - duplicate attribute memory (kmemdup) + * @src: netlink attribute to duplicate from + * @gfp: GFP mask + */ +static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) +{ + return kmemdup(nla_data(src), nla_len(src), gfp); +} + +/** * nla_nest_start - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e469e85de3f9..3d06d94d2e52 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -91,7 +91,6 @@ struct netns_ct { struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; - u8 label_words; #endif }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9584e9..f1b76b8e6d2d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -6,6 +6,8 @@ #include <linux/if_vlan.h> #include <net/sch_generic.h> +#define DEFAULT_TX_QUEUE_LEN 1000 + struct qdisc_walker { int stop; int skip; diff --git a/include/net/raw.h b/include/net/raw.h index 3e789008394d..57c33dd22ec4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -23,6 +23,12 @@ extern struct proto raw_prot; +extern struct raw_hashinfo raw_v4_hashinfo; +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, + unsigned short num, __be32 raddr, + __be32 laddr, int dif); + +int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 87783dea0791..cbe4e9de1894 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,13 @@ #include <net/protocol.h> +extern struct raw_hashinfo raw_v6_hashinfo; +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif); + +int raw_abort(struct sock *sk, int err); + void raw6_icmp_error(struct sk_buff *, int nexthdr, u8 type, u8 code, int inner_offset, __be32); bool raw6_local_deliver(struct sk_buff *, int); diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 11c3bf262a85..bd4a3ded7c87 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -530,7 +530,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_abandon:1, /* can chunks from this message can be abandoned. */ can_delay; /* should this message be Nagle delayed */ }; @@ -641,7 +640,6 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ - resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ @@ -656,6 +654,7 @@ struct sctp_chunk { fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; +#define sctp_chunk_retransmitted(chunk) (chunk->sent_count > 1) void sctp_chunk_hold(struct sctp_chunk *); void sctp_chunk_put(struct sctp_chunk *); int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4e0357517d79 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <net/lwtunnel.h> +#include <linux/seg6.h> +#include <linux/rhashtable.h> + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); + +#endif diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/seg6.h> +#include <linux/seg6_hmac.h> +#include <linux/rhashtable.h> + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a..442cbb118a07 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,6 +419,7 @@ struct sock { u32 sk_max_ack_backlog; __u32 sk_priority; __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -914,14 +915,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) #endif } -#define sk_wait_event(__sk, __timeo, __condition) \ +#define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ - *(__timeo) = schedule_timeout(*(__timeo)); \ + *(__timeo) = wait_woken(__wait, \ + TASK_INTERRUPTIBLE, \ + *(__timeo)); \ } \ - sched_annotate_sleep(); \ + sched_annotate_sleep(); \ lock_sock(__sk); \ __rc = __condition; \ __rc; \ @@ -1162,11 +1165,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk) sk->sk_prot->enter_memory_pressure(sk); } -static inline long sk_prot_mem_limits(const struct sock *sk, int index) -{ - return sk->sk_prot->sysctl_mem[index]; -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1276,14 +1274,32 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind); int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +/* We used to have PAGE_SIZE here, but systems with 64KB pages + * do not necessarily have 16x time more memory than 4KB ones. + */ +#define SK_MEM_QUANTUM 4096 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 +/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +static inline long sk_prot_mem_limits(const struct sock *sk, int index) +{ + long val = sk->sk_prot->sysctl_mem[index]; + +#if PAGE_SIZE > SK_MEM_QUANTUM + val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; +#elif PAGE_SIZE < SK_MEM_QUANTUM + val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; +#endif + return val; +} + static inline int sk_mem_pages(int amt) { return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; @@ -1651,6 +1667,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1658,6 +1675,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); @@ -1952,6 +1974,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 62770add15bd..604bc31e23ab 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,13 +8,13 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; int tcfm_ifindex; - int tcfm_ok_push; + bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) -static inline bool is_tcf_mirred_redirect(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) @@ -23,7 +23,7 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a) return false; } -static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 5767e9dbcf92..19cd3d345804 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ struct tcf_skbedit { u32 flags; u32 priority; u32 mark; + u32 mask; u16 queue_mapping; u16 ptype; }; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 253f8da6c2a6..efef0b4b1b2b 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -12,6 +12,8 @@ #define __NET_TC_TUNNEL_KEY_H #include <net/act_api.h> +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/dst_metadata.h> struct tcf_tunnel_key_params { struct rcu_head rcu; @@ -27,4 +29,39 @@ struct tcf_tunnel_key { #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) +static inline bool is_tcf_tunnel_set(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; +#endif + return false; +} + +static inline bool is_tcf_tunnel_release(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; +#endif + return false; +} + +static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + return ¶ms->tcft_enc_metadata->u.tun_info; +#else + return NULL; +#endif +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 4948790d393d..e6e4e19be387 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -246,6 +246,24 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* net/ipv4/udp.c */ +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} + void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, @@ -258,6 +276,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int udp_init_sock(struct sock *sk); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f09c70b97eca..e2f38e0091b6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,291 +143,301 @@ union bpf_attr { }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - - /** - * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_current_task_under_cgroup, - - /** - * bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, - * to be used f.e. with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_tail, - - /** - * bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the - * skb is non-linear and not all of len are part of the - * linear section. Only needed for read/write with direct - * packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_pull_data, - - /** - * bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - */ - BPF_FUNC_csum_update, - - /** - * bpf_set_hash_invalid(skb) - * Invalidate current skb>hash. - * @skb: pointer to skb - */ - BPF_FUNC_set_hash_invalid, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 5512c90af7e3..adc899381e0d 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -26,10 +26,11 @@ struct genlmsghdr { /* * List of reserved static generic netlink identifiers: */ -#define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) /************************************************************************** * Controller diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 117d02e0fc31..3e5185e9ef03 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -35,6 +35,9 @@ #define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ #define ETH_FCS_LEN 4 /* Octets in the FCS */ +#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */ +#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */ + /* * These are the defined Ethernet Protocol ID's. */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 509cd961068d..bbe201047df6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -43,6 +43,23 @@ struct inet_diag_req_v2 { struct inet_diag_sockid id; }; +/* + * SOCK_RAW sockets require the underlied protocol to be + * additionally specified so we can use @pad member for + * this, but we can't rename it because userspace programs + * still may depend on this name. Instead lets use another + * structure definition as an alias for struct + * @inet_diag_req_v2. + */ +struct inet_diag_req_raw { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u8 idiag_ext; + __u8 sdiag_raw_protocol; + __u32 idiag_states; + struct inet_diag_sockid id; +}; + enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..53561be1ac21 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,8 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_MAX }; diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 4bd27d0270a2..5daa48e2571e 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -124,8 +124,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ - L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ - L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, __L2TP_ATTR_MAX, }; diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..453cc6215bfd 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index d93f949d1d9a..7550e9176a54 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -13,7 +13,7 @@ #define NF_STOLEN 2 #define NF_QUEUE 3 #define NF_REPEAT 4 -#define NF_STOP 5 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ #define NF_MAX_VERDICT NF_STOP /* we overload the higher bits for encoding auxiliary data such as the queue diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c6c4477c136b..14e5f619167e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -759,6 +759,19 @@ enum nft_meta_keys { }; /** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, +}; + +/** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * * @NFTA_HASH_SREG: source register (NLA_U32) @@ -797,6 +810,20 @@ enum nft_meta_attributes { #define NFTA_META_MAX (__NFTA_META_MAX - 1) /** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + +/** * enum nft_ct_keys - nf_tables ct expression keys * * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) @@ -1109,6 +1136,42 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ +}; + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b4622..259c9c77fdc1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -600,6 +600,20 @@ * * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial * mesh config parameters may be given. * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the @@ -874,6 +888,12 @@ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1069,6 +1089,10 @@ enum nl80211_commands { NL80211_CMD_CHANGE_NAN_CONFIG, NL80211_CMD_NAN_MATCH, + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + + NL80211_CMD_UPDATE_CONNECT_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1638,8 +1662,16 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * - * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts - * with the Authentication transaction sequence number field. + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. * * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -1936,6 +1968,14 @@ enum nl80211_commands { * attribute. * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. + * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2195,7 +2235,7 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, - NL80211_ATTR_SAE_DATA, + NL80211_ATTR_AUTH_DATA, NL80211_ATTR_VHT_CAPABILITY, @@ -2336,6 +2376,11 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2347,6 +2392,7 @@ enum nl80211_attrs { #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA /* * Allow user space programs to use #ifdef on new attributes by defining them @@ -3660,6 +3706,9 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -3672,6 +3721,9 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -4280,6 +4332,9 @@ enum nl80211_iface_limit_attrs { * of supported channel widths for radar detection. * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -4287,8 +4342,8 @@ enum nl80211_iface_limit_attrs { * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 * => allows an AP and a STA that must match BIs * - * numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8 - * => allows 8 of AP/GO + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 * => allows two STAs on different channels @@ -4314,6 +4369,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_NUM_CHANNELS, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, /* keep last */ NUM_NL80211_IFACE_COMB, @@ -4634,6 +4690,8 @@ enum nl80211_feature_flags { * configuration (AP/mesh) with HT rates. * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate * configuration (AP/mesh) with VHT rates. + * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup + * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4648,6 +4706,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, NL80211_EXT_FEATURE_BEACON_RATE_HT, NL80211_EXT_FEATURE_BEACON_RATE_VHT, + NL80211_EXT_FEATURE_FILS_STA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 59ed3992c760..375d812fea36 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -705,6 +705,15 @@ enum ovs_nat_attr { #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) +/* + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; +}; + /** * enum ovs_action_attr - Action types. * @@ -738,6 +747,10 @@ enum ovs_nat_attr { * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related * entries in the flow key. + * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the + * packet. + * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the + * packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -765,6 +778,8 @@ enum ovs_action_attr { * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8fd715f806a2..86786d45ee66 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -447,6 +447,16 @@ enum { TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include <linux/seg6.h> + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 8e7890b26d9a..79d029d25310 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -24,6 +24,8 @@ #define SIOCINQ FIONREAD #define SIOCOUTQ TIOCOUTQ /* output queue size (not sent + not acked) */ +#define SOCK_IOC_TYPE 0x89 + /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ #define SIOCDELRT 0x890C /* delete routing table entry */ @@ -84,6 +86,7 @@ #define SIOCWANDEV 0x894A /* get/set netdev parameters */ #define SIOCOUTQNSD 0x894B /* output queue size (not sent only) */ +#define SIOCGSKNS 0x894C /* get socket network namespace */ /* ARP cache control calls. */ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index a4d00c608d8f..2884425738ce 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -28,6 +28,7 @@ #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 #define SKBEDIT_F_PTYPE 0x8 +#define SKBEDIT_F_MASK 0x10 struct tc_skbedit { tc_gen; @@ -42,6 +43,7 @@ enum { TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, TCA_SKBEDIT_PTYPE, + TCA_SKBEDIT_MASK, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 890106ff16e6..84ea55e1076b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -33,6 +33,7 @@ enum { TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ TCA_TUNNEL_KEY_PAD, + TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ __TCA_TUNNEL_KEY_MAX, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d98154106..82a04143368e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1043,6 +1043,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 39918402e6e9..045cbe673356 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -13,6 +13,7 @@ #include <linux/rcupdate.h> #include <linux/random.h> #include <linux/smp.h> +#include <linux/topology.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/uidgid.h> @@ -92,6 +93,17 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_numa_node_id) +{ + return numa_node_id(); +} + +const struct bpf_func_proto bpf_get_numa_node_id_proto = { + .func = bpf_get_numa_node_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 1ed8473ec537..2565809fbb34 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -87,6 +87,7 @@ static struct inode *bpf_get_inode(struct super_block *sb, switch (mode & S_IFMT) { case S_IFDIR: case S_IFREG: + case S_IFLNK: break; default: return ERR_PTR(-EINVAL); @@ -119,6 +120,16 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) return 0; } +static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, + struct inode *dir) +{ + d_instantiate(dentry, inode); + dget(dentry); + + dir->i_mtime = current_time(dir); + dir->i_ctime = dir->i_mtime; +} + static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -133,9 +144,7 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(inode); inc_nlink(dir); - d_instantiate(dentry, inode); - dget(dentry); - + bpf_dentry_finalize(dentry, inode, dir); return 0; } @@ -151,9 +160,7 @@ static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, inode->i_op = iops; inode->i_private = dentry->d_fsdata; - d_instantiate(dentry, inode); - dget(dentry); - + bpf_dentry_finalize(dentry, inode, dir); return 0; } @@ -181,13 +188,37 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { if (strchr(dentry->d_name.name, '.')) return ERR_PTR(-EPERM); + return simple_lookup(dir, dentry, flags); } +static int bpf_symlink(struct inode *dir, struct dentry *dentry, + const char *target) +{ + char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); + struct inode *inode; + + if (!link) + return -ENOMEM; + + inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); + if (IS_ERR(inode)) { + kfree(link); + return PTR_ERR(inode); + } + + inode->i_op = &simple_symlink_inode_operations; + inode->i_link = link; + + bpf_dentry_finalize(dentry, inode, dir); + return 0; +} + static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, .mknod = bpf_mkobj, .mkdir = bpf_mkdir, + .symlink = bpf_symlink, .rmdir = simple_rmdir, .rename = simple_rename, .link = simple_link, @@ -324,6 +355,8 @@ static void bpf_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); if (!bpf_inode_type(inode, &type)) bpf_any_put(inode->i_private, type); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 237f3d6a7ddc..233e3ac836a6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -17,6 +17,7 @@ #include <linux/license.h> #include <linux/filter.h> #include <linux/version.h> +#include <linux/kernel.h> DEFINE_PER_CPU(int, bpf_prog_active); @@ -254,12 +255,6 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } -/* helper to convert user pointers passed inside __aligned_u64 fields */ -static void __user *u64_to_ptr(__u64 val) -{ - return (void __user *) (unsigned long) val; -} - int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -270,8 +265,8 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) static int map_lookup_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *uvalue = u64_to_ptr(attr->value); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; void *key, *value, *ptr; @@ -344,8 +339,8 @@ err_put: static int map_update_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *uvalue = u64_to_ptr(attr->value); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; @@ -422,7 +417,7 @@ err_put: static int map_delete_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); + void __user *ukey = u64_to_user_ptr(attr->key); int ufd = attr->map_fd; struct bpf_map *map; struct fd f; @@ -466,8 +461,8 @@ err_put: static int map_get_next_key(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *unext_key = u64_to_ptr(attr->next_key); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *unext_key = u64_to_user_ptr(attr->next_key); int ufd = attr->map_fd; struct bpf_map *map; void *key, *next_key; @@ -682,6 +677,17 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) } EXPORT_SYMBOL_GPL(bpf_prog_add); +void bpf_prog_sub(struct bpf_prog *prog, int i) +{ + /* Only to be used for undoing previous bpf_prog_add() in some + * error path. We still know that another entity in our call + * path holds a reference to the program, thus atomic_sub() can + * be safely used in such cases! + */ + WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); +} +EXPORT_SYMBOL_GPL(bpf_prog_sub); + struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) { return bpf_prog_add(prog, 1); @@ -732,7 +738,7 @@ static int bpf_prog_load(union bpf_attr *attr) return -EINVAL; /* copy eBPF program license from user space */ - if (strncpy_from_user(license, u64_to_ptr(attr->license), + if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) return -EFAULT; license[sizeof(license) - 1] = 0; @@ -762,7 +768,7 @@ static int bpf_prog_load(union bpf_attr *attr) prog->len = attr->insn_cnt; err = -EFAULT; - if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), + if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), prog->len * sizeof(struct bpf_insn)) != 0) goto free_prog; @@ -813,7 +819,7 @@ static int bpf_obj_pin(const union bpf_attr *attr) if (CHECK_ATTR(BPF_OBJ)) return -EINVAL; - return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); + return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); } static int bpf_obj_get(const union bpf_attr *attr) @@ -821,7 +827,7 @@ static int bpf_obj_get(const union bpf_attr *attr) if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) return -EINVAL; - return bpf_obj_get_user(u64_to_ptr(attr->pathname)); + return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); } SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99a7e5b388f2..89f787ca47ef 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19,6 +19,7 @@ #include <net/netlink.h> #include <linux/file.h> #include <linux/vmalloc.h> +#include <linux/stringify.h> /* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. @@ -190,6 +191,22 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; +#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x) +static const char * const func_id_str[] = { + __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN) +}; +#undef __BPF_FUNC_STR_FN + +static const char *func_id_name(int id) +{ + BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); + + if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) + return func_id_str[id]; + else + return "unknown"; +} + static void print_verifier_state(struct bpf_verifier_state *state) { struct bpf_reg_state *reg; @@ -212,9 +229,10 @@ static void print_verifier_state(struct bpf_verifier_state *state) else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL || t == PTR_TO_MAP_VALUE_ADJ) - verbose("(ks=%d,vs=%d)", + verbose("(ks=%d,vs=%d,id=%u)", reg->map_ptr->key_size, - reg->map_ptr->value_size); + reg->map_ptr->value_size, + reg->id); if (reg->min_value != BPF_REGISTER_MIN_RANGE) verbose(",min_value=%llu", (unsigned long long)reg->min_value); @@ -353,7 +371,8 @@ static void print_bpf_insn(struct bpf_insn *insn) u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { - verbose("(%02x) call %d\n", insn->code, insn->imm); + verbose("(%02x) call %s#%d\n", insn->code, + func_id_name(insn->imm), insn->imm); } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose("(%02x) goto pc%+d\n", insn->code, insn->off); @@ -447,6 +466,7 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; + regs[regno].id = 0; regs[regno].imm = 0; } @@ -1112,8 +1132,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) return 0; error: - verbose("cannot pass map_type %d into func %d\n", - map->map_type, func_id); + verbose("cannot pass map_type %d into func %s#%d\n", + map->map_type, func_id_name(func_id), func_id); return -EINVAL; } @@ -1170,7 +1190,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) /* find function prototype */ if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { - verbose("invalid func %d\n", func_id); + verbose("invalid func %s#%d\n", func_id_name(func_id), func_id); return -EINVAL; } @@ -1178,7 +1198,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) fn = env->prog->aux->ops->get_func_proto(func_id); if (!fn) { - verbose("unknown func %d\n", func_id); + verbose("unknown func %s#%d\n", func_id_name(func_id), func_id); return -EINVAL; } @@ -1198,7 +1218,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id) */ err = check_raw_mode(fn); if (err) { - verbose("kernel subsystem misconfigured func %d\n", func_id); + verbose("kernel subsystem misconfigured func %s#%d\n", + func_id_name(func_id), func_id); return err; } @@ -1252,9 +1273,10 @@ static int check_call(struct bpf_verifier_env *env, int func_id) return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].id = ++env->id_gen; } else { - verbose("unknown return type %d of func %d\n", - fn->ret_type, func_id); + verbose("unknown return type %d of func %s#%d\n", + fn->ret_type, func_id_name(func_id), func_id); return -EINVAL; } @@ -1477,7 +1499,6 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, { struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; - bool min_set = false, max_set = false; u8 opcode = BPF_OP(insn->code); dst_reg = ®s[insn->dst_reg]; @@ -1500,7 +1521,6 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, } else if (insn->imm < BPF_REGISTER_MAX_RANGE && (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { min_val = max_val = insn->imm; - min_set = max_set = true; } /* We don't know anything about what was done to this register, mark it @@ -1644,8 +1664,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } - regs[insn->dst_reg].type = UNKNOWN_VALUE; - regs[insn->dst_reg].map_ptr = NULL; + mark_reg_unknown_value(regs, insn->dst_reg); } } else { /* case: R = imm @@ -1907,6 +1926,38 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(true_reg); } +static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, + enum bpf_reg_type type) +{ + struct bpf_reg_state *reg = ®s[regno]; + + if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { + reg->type = type; + if (type == UNKNOWN_VALUE) + mark_reg_unknown_value(regs, regno); + } +} + +/* The logic is similar to find_good_pkt_pointers(), both could eventually + * be folded together at some point. + */ +static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, + enum bpf_reg_type type) +{ + struct bpf_reg_state *regs = state->regs; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_map_reg(regs, i, regs[regno].id, type); + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, + regs[regno].id, type); + } +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -1994,18 +2045,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - if (opcode == BPF_JEQ) { - /* next fallthrough insn can access memory via - * this register - */ - regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - /* branch targer cannot access it, since reg == 0 */ - mark_reg_unknown_value(other_branch->regs, - insn->dst_reg); - } else { - other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - mark_reg_unknown_value(regs, insn->dst_reg); - } + /* Mark all identical map registers in each branch as either + * safe or unknown depending R == 0 or R != 0 conditional. + */ + mark_map_regs(this_branch, insn->dst_reg, + opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE); + mark_map_regs(other_branch, insn->dst_reg, + opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index cbb387a265db..8a5e44236f78 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -41,12 +41,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; -static struct genl_family family = { - .id = GENL_ID_GENERATE, - .name = TASKSTATS_GENL_NAME, - .version = TASKSTATS_GENL_VERSION, - .maxattr = TASKSTATS_CMD_ATTR_MAX, -}; +static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, @@ -655,6 +650,15 @@ static const struct genl_ops taskstats_ops[] = { }, }; +static struct genl_family family __ro_after_init = { + .name = TASKSTATS_GENL_NAME, + .version = TASKSTATS_GENL_VERSION, + .maxattr = TASKSTATS_CMD_ATTR_MAX, + .module = THIS_MODULE, + .ops = taskstats_ops, + .n_ops = ARRAY_SIZE(taskstats_ops), +}; + /* Needed early in initialization */ void __init taskstats_init_early(void) { @@ -671,7 +675,7 @@ static int __init taskstats_init(void) { int rc; - rc = genl_register_family_with_ops(&family, taskstats_ops); + rc = genl_register_family(&family); if (rc) return rc; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5dcb99281259..fa77311dadb2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -422,6 +422,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: diff --git a/net/802/fddi.c b/net/802/fddi.c index 7d3a0af954e8..6356623fc238 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -int fddi_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(fddi_change_mtu); - static const struct header_ops fddi_header_ops = { .create = fddi_header, }; @@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev) dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ + dev->min_mtu = FDDI_K_SNAP_HLEN; + dev->max_mtu = FDDI_K_SNAP_DLEN; dev->addr_len = FDDI_K_ALEN; dev->tx_queue_len = 100; /* Long queues on FDDI */ dev->flags = IFF_BROADCAST | IFF_MULTICAST; diff --git a/net/802/hippi.c b/net/802/hippi.c index ade1a52cdcff..5e4427beab2b 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -int hippi_change_mtu(struct net_device *dev, int new_mtu) -{ - /* - * HIPPI's got these nice large MTUs. - */ - if ((new_mtu < 68) || (new_mtu > 65280)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(hippi_change_mtu); - /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. @@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev) dev->type = ARPHRD_HIPPI; dev->hard_header_len = HIPPI_HLEN; dev->mtu = 65280; + dev->min_mtu = 68; + dev->max_mtu = 65280; dev->addr_len = HIPPI_ALEN; dev->tx_queue_len = 25 /* 5 */; memset(dev->broadcast, 0xFF, HIPPI_ALEN); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f2531ad66b68..a79365574531 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) return -EFAULT; /* Null terminate this sucker, just in case. */ - args.device1[23] = 0; - args.u.device2[23] = 0; + args.device1[sizeof(args.device1) - 1] = 0; + args.u.device2[sizeof(args.u.device2) - 1] = 0; rtnl_lock(); @@ -571,8 +571,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; - if ((args.u.name_type >= 0) && - (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { + if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { struct vlan_net *vn; vn = net_generic(net, vlan_net_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fbfacd51aa34..10da6c588bf8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -826,5 +826,8 @@ void vlan_setup(struct net_device *dev) dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; + eth_zero_addr(dev->broadcast); } diff --git a/net/atm/br2684.c b/net/atm/br2684.c index aa0047c5c467..fca84e111c89 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -620,14 +620,12 @@ error: static const struct net_device_ops br2684_netdev_ops = { .ndo_start_xmit = br2684_start_xmit, .ndo_set_mac_address = br2684_mac_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; static const struct net_device_ops br2684_netdev_ops_routed = { .ndo_start_xmit = br2684_start_xmit, .ndo_set_mac_address = br2684_mac_addr, - .ndo_change_mtu = eth_change_mtu }; static void br2684_setup(struct net_device *netdev) @@ -651,7 +649,9 @@ static void br2684_setup_routed(struct net_device *netdev) netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */ netdev->netdev_ops = &br2684_netdev_ops_routed; netdev->addr_len = 0; - netdev->mtu = 1500; + netdev->mtu = ETH_DATA_LEN; + netdev->min_mtu = 0; + netdev->max_mtu = ETH_MAX_MTU; netdev->type = ARPHRD_PPP; netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; netdev->tx_queue_len = 100; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5d2693826afb..779b3fa6052d 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, return 0; } -/* shamelessly stolen from drivers/net/net_init.c */ -static int lec_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > 18190)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static void lec_set_multicast_list(struct net_device *dev) { /* @@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_open = lec_open, .ndo_stop = lec_close, .ndo_start_xmit = lec_start_xmit, - .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, .ndo_set_rx_mode = lec_set_multicast_list, }; @@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) if (!dev_lec[i]) return -ENOMEM; dev_lec[i]->netdev_ops = &lec_netdev_ops; + dev_lec[i]->max_mtu = 18190; snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index f20742cbae6d..b73b96a2854b 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -17,7 +17,7 @@ config BATMAN_ADV config BATMAN_ADV_BATMAN_V bool "B.A.T.M.A.N. V protocol (experimental)" - depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m) + depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y) default n help This option enables the B.A.T.M.A.N. V protocol, the successor diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e2d18d0b1f06..bd39247e2f0f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -698,7 +698,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { - batadv_forw_packet_free(forw_packet_aggr); + batadv_forw_packet_free(forw_packet_aggr, true); return; } @@ -1611,7 +1611,7 @@ out: if (hardif_neigh) batadv_hardif_neigh_put(hardif_neigh); - kfree_skb(skb_priv); + consume_skb(skb_priv); } /** @@ -1783,6 +1783,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; + bool dropped = false; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, @@ -1792,8 +1793,10 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) hlist_del(&forw_packet->list); spin_unlock_bh(&bat_priv->forw_bat_list_lock); - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { + dropped = true; goto out; + } batadv_iv_ogm_emit(forw_packet); @@ -1810,7 +1813,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, dropped); } static int batadv_iv_ogm_receive(struct sk_buff *skb, @@ -1820,17 +1823,18 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_ogm_packet *ogm_packet; u8 *packet_pos; int ogm_offset; - bool ret; + bool res; + int ret = NET_RX_DROP; - ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); - if (!ret) - return NET_RX_DROP; + res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); + if (!res) + goto free_skb; /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface * that does not have B.A.T.M.A.N. IV enabled ? */ if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) - return NET_RX_DROP; + goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, @@ -1851,8 +1855,15 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, ogm_packet = (struct batadv_ogm_packet *)packet_pos; } - kfree_skb(skb); - return NET_RX_SUCCESS; + ret = NET_RX_SUCCESS; + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); + + return ret; } #ifdef CONFIG_BATMAN_ADV_DEBUGFS @@ -2486,7 +2497,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) struct batadv_orig_node *orig_node; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { orig_node = gw_node->orig_node; router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) @@ -2674,7 +2685,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv, " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { /* fails if orig_node has no router */ if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) continue; @@ -2774,7 +2785,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, int idx = 0; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { if (idx++ < idx_skip) continue; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index e79f6f01182e..2ac612d7bab4 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -750,7 +750,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) u32 max_bw = 0, bw; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { if (!kref_get_unless_zero(&gw_node->refcount)) continue; @@ -787,7 +787,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node) { - struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL; + struct batadv_gw_node *curr_gw, *orig_gw = NULL; u32 gw_throughput, orig_throughput, threshold; bool ret = false; @@ -889,7 +889,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv, " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { /* fails if orig_node has no router */ if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) continue; @@ -1009,7 +1009,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, int idx = 0; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { if (idx++ < idx_skip) continue; diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index ee08540ce503..54bdd415e8df 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -492,20 +492,21 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - bool ret; + bool res; + int ret = NET_RX_DROP; - ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN); - if (!ret) - return NET_RX_DROP; + res = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN); + if (!res) + goto free_skb; if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - return NET_RX_DROP; + goto free_skb; /* did we receive a B.A.T.M.A.N. V ELP packet on an interface * that does not have B.A.T.M.A.N. V ELP enabled ? */ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) - return NET_RX_DROP; + goto free_skb; elp_packet = (struct batadv_elp_packet *)skb->data; @@ -516,14 +517,19 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out; + goto free_skb; batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming, elp_packet); -out: - if (primary_if) - batadv_hardif_put(primary_if); - consume_skb(skb); - return NET_RX_SUCCESS; + ret = NET_RX_SUCCESS; + batadv_hardif_put(primary_if); + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); + + return ret; } diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 1aeeadca620c..38b9aab83fc0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -140,6 +140,7 @@ static void batadv_v_ogm_send(struct work_struct *work) unsigned char *ogm_buff, *pkt_buff; int ogm_buff_len; u16 tvlv_len = 0; + int ret; bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); bat_priv = container_of(bat_v, struct batadv_priv, bat_v); @@ -182,6 +183,31 @@ static void batadv_v_ogm_send(struct work_struct *work) if (!kref_get_unless_zero(&hard_iface->refcount)) continue; + ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL); + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n", + hard_iface->net_dev->name, type); + + batadv_hardif_put(hard_iface); + continue; + } + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", ogm_packet->orig, ntohl(ogm_packet->seqno), @@ -401,7 +427,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_orig_ifinfo *orig_ifinfo = NULL; + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; bool protection_started = false; int ret = -EINVAL; @@ -486,7 +512,7 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *router = NULL; - struct batadv_orig_node *orig_neigh_node = NULL; + struct batadv_orig_node *orig_neigh_node; struct batadv_neigh_node *orig_neigh_router = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL; u32 router_throughput, neigh_throughput; @@ -651,6 +677,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; u32 ogm_throughput, link_throughput, path_throughput; + int ret; ethhdr = eth_hdr(skb); ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset); @@ -716,6 +743,35 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (!kref_get_unless_zero(&hard_iface->refcount)) continue; + ret = batadv_hardif_no_broadcast(hard_iface, + ogm_packet->orig, + hardif_neigh->orig); + + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n", + ogm_packet->orig, hard_iface->net_dev->name, + type); + + batadv_hardif_put(hard_iface); + continue; + } + batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node, neigh_node, if_incoming, hard_iface); @@ -754,18 +810,18 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, * B.A.T.M.A.N. V enabled ? */ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) - return NET_RX_DROP; + goto free_skb; if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) - return NET_RX_DROP; + goto free_skb; if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - return NET_RX_DROP; + goto free_skb; ogm_packet = (struct batadv_ogm2_packet *)skb->data; if (batadv_is_my_mac(bat_priv, ogm_packet->orig)) - return NET_RX_DROP; + goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, @@ -786,7 +842,12 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, } ret = NET_RX_SUCCESS; - consume_skb(skb); + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); return ret; } diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index b4ffba7dd583..77925504379d 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -186,7 +186,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \ /* the following attributes are general and therefore they will be directly * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs */ -static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open); +static BATADV_DEBUGINFO(routing_algos, 0444, batadv_algorithms_open); static struct batadv_debuginfo *batadv_general_debuginfos[] = { &batadv_debuginfo_routing_algos, @@ -194,26 +194,24 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = { }; /* The following attributes are per soft interface */ -static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open); -static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open); -static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open); -static BATADV_DEBUGINFO(transtable_global, S_IRUGO, - batadv_transtable_global_open); +static BATADV_DEBUGINFO(neighbors, 0444, neighbors_open); +static BATADV_DEBUGINFO(originators, 0444, batadv_originators_open); +static BATADV_DEBUGINFO(gateways, 0444, batadv_gateways_open); +static BATADV_DEBUGINFO(transtable_global, 0444, batadv_transtable_global_open); #ifdef CONFIG_BATMAN_ADV_BLA -static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); -static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, +static BATADV_DEBUGINFO(bla_claim_table, 0444, batadv_bla_claim_table_open); +static BATADV_DEBUGINFO(bla_backbone_table, 0444, batadv_bla_backbone_table_open); #endif #ifdef CONFIG_BATMAN_ADV_DAT -static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); +static BATADV_DEBUGINFO(dat_cache, 0444, batadv_dat_cache_open); #endif -static BATADV_DEBUGINFO(transtable_local, S_IRUGO, - batadv_transtable_local_open); +static BATADV_DEBUGINFO(transtable_local, 0444, batadv_transtable_local_open); #ifdef CONFIG_BATMAN_ADV_NC -static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); +static BATADV_DEBUGINFO(nc_nodes, 0444, batadv_nc_nodes_open); #endif #ifdef CONFIG_BATMAN_ADV_MCAST -static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open); +static BATADV_DEBUGINFO(mcast_flags, 0444, batadv_mcast_flags_open); #endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { @@ -253,7 +251,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ }, \ } -static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, +static BATADV_HARDIF_DEBUGINFO(originators, 0444, batadv_originators_hardif_open); static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e257efdc5d03..49576c5a3fe3 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -369,12 +369,11 @@ out: * batadv_dbg_arp - print a debug message containing all the ARP packet details * @bat_priv: the bat priv with all the soft interface information * @skb: ARP packet - * @type: ARP type * @hdr_size: size of the possible header before the ARP packet * @msg: message to print together with the debugging information */ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, - u16 type, int hdr_size, char *msg) + int hdr_size, char *msg) { struct batadv_unicast_4addr_packet *unicast_4addr_packet; struct batadv_bcast_packet *bcast_pkt; @@ -441,7 +440,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, #else static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, - u16 type, int hdr_size, char *msg) + int hdr_size, char *msg) { } @@ -950,6 +949,41 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) } /** + * batadv_dat_arp_create_reply - create an ARP Reply + * @bat_priv: the bat priv with all the soft interface information + * @ip_src: ARP sender IP + * @ip_dst: ARP target IP + * @hw_src: Ethernet source and ARP sender MAC + * @hw_dst: Ethernet destination and ARP target MAC + * @vid: VLAN identifier (optional, set to zero otherwise) + * + * Creates an ARP Reply from the given values, optionally encapsulated in a + * VLAN header. + * + * Return: An skb containing an ARP Reply. + */ +static struct sk_buff * +batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, + __be32 ip_dst, u8 *hw_src, u8 *hw_dst, + unsigned short vid) +{ + struct sk_buff *skb; + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface, + ip_src, hw_dst, hw_src, hw_dst); + if (!skb) + return NULL; + + skb_reset_mac_header(skb); + + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); + + return skb; +} + +/** * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to * answer using DAT * @bat_priv: the bat priv with all the soft interface information @@ -983,8 +1017,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, if (type != ARPOP_REQUEST) goto out; - batadv_dbg_arp(bat_priv, skb, type, hdr_size, - "Parsing outgoing ARP REQUEST"); + batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REQUEST"); ip_src = batadv_arp_ip_src(skb, hdr_size); hw_src = batadv_arp_hw_src(skb, hdr_size); @@ -1007,20 +1040,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, goto out; } - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - bat_priv->soft_iface, ip_dst, hw_src, - dat_entry->mac_addr, hw_src); + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, + dat_entry->mac_addr, + hw_src, vid); if (!skb_new) goto out; - if (vid & BATADV_VLAN_HAS_TAG) { - skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), - vid & VLAN_VID_MASK); - if (!skb_new) - goto out; - } - - skb_reset_mac_header(skb_new); skb_new->protocol = eth_type_trans(skb_new, bat_priv->soft_iface); bat_priv->stats.rx_packets++; @@ -1075,8 +1100,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, ip_src = batadv_arp_ip_src(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); - batadv_dbg_arp(bat_priv, skb, type, hdr_size, - "Parsing incoming ARP REQUEST"); + batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REQUEST"); batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); @@ -1084,25 +1108,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!dat_entry) goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - bat_priv->soft_iface, ip_dst, hw_src, - dat_entry->mac_addr, hw_src); - + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, + dat_entry->mac_addr, hw_src, vid); if (!skb_new) goto out; - /* the rest of the TX path assumes that the mac_header offset pointing - * to the inner Ethernet header has been set, therefore reset it now. - */ - skb_reset_mac_header(skb_new); - - if (vid & BATADV_VLAN_HAS_TAG) { - skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), - vid & VLAN_VID_MASK); - if (!skb_new) - goto out; - } - /* To preserve backwards compatibility, the node has choose the outgoing * format based on the incoming request packet type. The assumption is * that a node not using the 4addr packet format doesn't support it. @@ -1149,8 +1159,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, if (type != ARPOP_REPLY) return; - batadv_dbg_arp(bat_priv, skb, type, hdr_size, - "Parsing outgoing ARP REPLY"); + batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REPLY"); hw_src = batadv_arp_hw_src(skb, hdr_size); ip_src = batadv_arp_ip_src(skb, hdr_size); @@ -1195,8 +1204,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, if (type != ARPOP_REPLY) goto out; - batadv_dbg_arp(bat_priv, skb, type, hdr_size, - "Parsing incoming ARP REPLY"); + batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REPLY"); hw_src = batadv_arp_hw_src(skb, hdr_size); ip_src = batadv_arp_ip_src(skb, hdr_size); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0934730fb7ff..9c561e683f4b 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> @@ -42,17 +43,23 @@ /** * batadv_frag_clear_chain - delete entries in the fragment buffer chain * @head: head of chain with entries. + * @dropped: whether the chain is cleared because all fragments are dropped * * Free fragments in the passed hlist. Should be called with appropriate lock. */ -static void batadv_frag_clear_chain(struct hlist_head *head) +static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped) { struct batadv_frag_list_entry *entry; struct hlist_node *node; hlist_for_each_entry_safe(entry, node, head, list) { hlist_del(&entry->list); - kfree_skb(entry->skb); + + if (dropped) + kfree_skb(entry->skb); + else + consume_skb(entry->skb); + kfree(entry); } } @@ -73,7 +80,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, spin_lock_bh(&chain->lock); if (!check_cb || check_cb(chain)) { - batadv_frag_clear_chain(&chain->head); + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; } @@ -117,8 +124,8 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, if (chain->seqno == seqno) return false; - if (!hlist_empty(&chain->head)) - batadv_frag_clear_chain(&chain->head); + if (!hlist_empty(&chain->fragment_list)) + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; chain->seqno = seqno; @@ -176,7 +183,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, chain = &orig_node->fragments[bucket]; spin_lock_bh(&chain->lock); if (batadv_frag_init_chain(chain, seqno)) { - hlist_add_head(&frag_entry_new->list, &chain->head); + hlist_add_head(&frag_entry_new->list, &chain->fragment_list); chain->size = skb->len - hdr_size; chain->timestamp = jiffies; chain->total_size = ntohs(frag_packet->total_size); @@ -185,7 +192,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, } /* Find the position for the new fragment. */ - hlist_for_each_entry(frag_entry_curr, &chain->head, list) { + hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) { /* Drop packet if fragment already exists. */ if (frag_entry_curr->no == frag_entry_new->no) goto err_unlock; @@ -220,11 +227,11 @@ out: * exceeds the maximum size of one merged packet. Don't allow * packets to have different total_size. */ - batadv_frag_clear_chain(&chain->head); + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; } else if (ntohs(frag_packet->total_size) == chain->size) { /* All fragments received. Hand over chain to caller. */ - hlist_move_list(&chain->head, chain_out); + hlist_move_list(&chain->fragment_list, chain_out); chain->size = 0; } @@ -252,8 +259,9 @@ batadv_frag_merge_packets(struct hlist_head *chain) { struct batadv_frag_packet *packet; struct batadv_frag_list_entry *entry; - struct sk_buff *skb_out = NULL; + struct sk_buff *skb_out; int size, hdr_size = sizeof(struct batadv_frag_packet); + bool dropped = false; /* Remove first entry, as this is the destination for the rest of the * fragments. @@ -270,6 +278,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { kfree_skb(skb_out); skb_out = NULL; + dropped = true; goto free; } @@ -291,7 +300,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) free: /* Locking is not needed, because 'chain' is not part of any orig. */ - batadv_frag_clear_chain(chain); + batadv_frag_clear_chain(chain, dropped); return skb_out; } @@ -352,7 +361,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_src) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_orig_node *orig_node_dst = NULL; + struct batadv_orig_node *orig_node_dst; struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; u16 total_size; @@ -433,8 +442,7 @@ err: * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments * - * Return: the netdev tx status or -1 in case of error. - * When -1 is returned the skb is not consumed. + * Return: the netdev tx status or a negative errno code on a failure */ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -447,7 +455,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, max_packet_size; - int ret = -1; + int ret; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -457,13 +465,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; /* Don't even try to fragment, if we need more than 16 fragments */ - if (skb->len > max_packet_size) - goto out; + if (skb->len > max_packet_size) { + ret = -EAGAIN; + goto free_skb; + } bat_priv = orig_node->bat_priv; primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + if (!primary_if) { + ret = -EINVAL; + goto put_primary_if; + } /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; @@ -488,34 +500,35 @@ int batadv_frag_send_packet(struct sk_buff *skb, /* Eat and send fragments from the tail of skb */ while (skb->len > max_fragment_size) { skb_fragment = batadv_frag_create(skb, &frag_header, mtu); - if (!skb_fragment) - goto out; + if (!skb_fragment) { + ret = -ENOMEM; + goto free_skb; + } batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb_fragment->len + ETH_HLEN); ret = batadv_send_unicast_skb(skb_fragment, neigh_node); if (ret != NET_XMIT_SUCCESS) { - /* return -1 so that the caller can free the original - * skb - */ - ret = -1; - goto out; + ret = NET_XMIT_DROP; + goto free_skb; } frag_header.no++; /* The initial check in this function should cover this case */ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { - ret = -1; - goto out; + ret = -EINVAL; + goto free_skb; } } /* Make room for the fragment header. */ if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) - goto out; + pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { + ret = -ENOMEM; + goto free_skb; + } memcpy(skb->data, &frag_header, header_size); @@ -524,10 +537,13 @@ int batadv_frag_send_packet(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb->len + ETH_HLEN); ret = batadv_send_unicast_skb(skb, neigh_node); + /* skb was consumed */ + skb = NULL; -out: - if (primary_if) - batadv_hardif_put(primary_if); +put_primary_if: + batadv_hardif_put(primary_if); +free_skb: + kfree_skb(skb); return ret; } diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 3202fe329e63..b95f619606af 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -47,7 +47,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, static inline bool batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) { - if (!hlist_empty(&frags_entry->head) && + if (!hlist_empty(&frags_entry->fragment_list) && batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) return true; return false; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index de055d64debe..52b8bd6ec431 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -348,7 +348,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->gw.list_lock); kref_get(&gw_node->refcount); - hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); + hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list); spin_unlock_bh(&bat_priv->gw.list_lock); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -376,7 +376,8 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.gateway_list, + list) { if (gw_node_tmp->orig_node != orig_node) continue; @@ -475,7 +476,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_lock_bh(&bat_priv->gw.list_lock); hlist_for_each_entry_safe(gw_node, node_tmp, - &bat_priv->gw.list, list) { + &bat_priv->gw.gateway_list, list) { hlist_del_init_rcu(&gw_node->list); batadv_gw_node_put(gw_node); } @@ -704,7 +705,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, { struct batadv_neigh_node *neigh_curr = NULL; struct batadv_neigh_node *neigh_old = NULL; - struct batadv_orig_node *orig_dst_node = NULL; + struct batadv_orig_node *orig_dst_node; struct batadv_gw_node *gw_node = NULL; struct batadv_gw_node *curr_gw = NULL; struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e034afbd1bb0..dc1816e9d53b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -228,6 +228,58 @@ bool batadv_is_wifi_netdev(struct net_device *net_device) return false; } +/** + * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary + * @if_outgoing: the outgoing interface checked and considered for (re)broadcast + * @orig_addr: the originator of this packet + * @orig_neigh: originator address of the forwarder we just got the packet from + * (NULL if we originated) + * + * Checks whether a packet needs to be (re)broadcasted on the given interface. + * + * Return: + * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface + * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder + * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator + * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast + */ +int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, + u8 *orig_addr, u8 *orig_neigh) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + struct hlist_node *first; + int ret = BATADV_HARDIF_BCAST_OK; + + rcu_read_lock(); + + /* 0 neighbors -> no (re)broadcast */ + first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list)); + if (!first) { + ret = BATADV_HARDIF_BCAST_NORECIPIENT; + goto out; + } + + /* >1 neighbors -> (re)brodcast */ + if (rcu_dereference(hlist_next_rcu(first))) + goto out; + + hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node, + list); + + /* 1 neighbor, is the originator -> no rebroadcast */ + if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) { + ret = BATADV_HARDIF_BCAST_DUPORIG; + /* 1 neighbor, is the one we received from -> no rebroadcast */ + } else if (orig_neigh && + batadv_compare_eth(hardif_neigh->orig, orig_neigh)) { + ret = BATADV_HARDIF_BCAST_DUPFWD; + } + +out: + rcu_read_unlock(); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index a76724d369bf..a043182586e9 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -40,6 +40,20 @@ enum batadv_hard_if_state { }; /** + * enum batadv_hard_if_bcast - broadcast avoidance options + * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface + * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient + * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from + * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator + */ +enum batadv_hard_if_bcast { + BATADV_HARDIF_BCAST_OK = 0, + BATADV_HARDIF_BCAST_NORECIPIENT, + BATADV_HARDIF_BCAST_DUPFWD, + BATADV_HARDIF_BCAST_DUPORIG, +}; + +/** * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed @@ -63,6 +77,8 @@ void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_release(struct kref *ref); +int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, + u8 *orig_addr, u8 *orig_neigh); /** * batadv_hardif_put - decrement the hard interface refcounter and possibly diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index cbbf87075f06..557a7044cfbc 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -61,36 +61,6 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, /* free only the hashtable and the hash itself. */ void batadv_hash_destroy(struct batadv_hashtable *hash); -/* remove the hash structure. if hashdata_free_cb != NULL, this function will be - * called to remove the elements inside of the hash. if you don't remove the - * elements, memory might be leaked. - */ -static inline void batadv_hash_delete(struct batadv_hashtable *hash, - batadv_hashdata_free_cb free_cb, - void *arg) -{ - struct hlist_head *head; - struct hlist_node *node, *node_tmp; - spinlock_t *list_lock; /* spinlock to protect write access */ - u32 i; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - list_lock = &hash->list_locks[i]; - - spin_lock_bh(list_lock); - hlist_for_each_safe(node, node_tmp, head) { - hlist_del_rcu(node); - - if (free_cb) - free_cb(node, arg); - } - spin_unlock_bh(list_lock); - } - - batadv_hash_destroy(hash); -} - /** * batadv_hash_add - adds data to the hashtable * @hash: storage hash table diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 378cc1119d66..b310f381ae02 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -38,7 +38,6 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/stat.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/uaccess.h> @@ -322,8 +321,8 @@ int batadv_socket_setup(struct batadv_priv *bat_priv) if (!bat_priv->debug_dir) goto err; - d = debugfs_create_file(BATADV_ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR, - bat_priv->debug_dir, bat_priv, &batadv_fops); + d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir, + bat_priv, &batadv_fops); if (!d) goto err; diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 56dc532f7a2c..c73c31769aba 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -31,7 +31,6 @@ #include <linux/sched.h> /* for linux/wait.h */ #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/stat.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/uaccess.h> @@ -212,8 +211,7 @@ int batadv_debug_log_setup(struct batadv_priv *bat_priv) spin_lock_init(&bat_priv->debug_log->lock); init_waitqueue_head(&bat_priv->debug_log->queue_wait); - d = debugfs_create_file("log", S_IFREG | S_IRUSR, - bat_priv->debug_dir, bat_priv, + d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv, &batadv_log_fops); if (!d) goto err; diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index d2905a855d1b..3284a7b0325d 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -71,12 +71,12 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); /* possibly ratelimited debug output */ -#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ - do { \ - if (atomic_read(&bat_priv->log_level) & type && \ - (!ratelimited || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg);\ - } \ +#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ + do { \ + if (atomic_read(&(bat_priv)->log_level) & (type) && \ + (!(ratelimited) || net_ratelimit())) \ + batadv_debug_log(bat_priv, fmt, ## arg); \ + } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ __printf(4, 5) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 2c017ab47557..6b5dae6f0307 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -160,7 +160,7 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); - INIT_HLIST_HEAD(&bat_priv->gw.list); + INIT_HLIST_HEAD(&bat_priv->gw.gateway_list); #ifdef CONFIG_BATMAN_ADV_MCAST INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list); INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list); @@ -402,6 +402,8 @@ void batadv_skb_set_priority(struct sk_buff *skb, int offset) static int batadv_recv_unhandled_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { + kfree_skb(skb); + return NET_RX_DROP; } @@ -416,7 +418,6 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *hard_iface; u8 idx; - int ret; hard_iface = container_of(ptype, struct batadv_hard_iface, batman_adv_ptype); @@ -466,14 +467,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); - /* all receive handlers return whether they received or reused - * the supplied skb. if not, we have to free the skb. - */ idx = batadv_ogm_packet->packet_type; - ret = (*batadv_rx_handler[idx])(skb, hard_iface); - - if (ret == NET_RX_DROP) - kfree_skb(skb); + (*batadv_rx_handler[idx])(skb, hard_iface); batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 09af21e27639..a6cc8040a21d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.4" +#define BATADV_SOURCE_VERSION "2016.5" #endif /* B.A.T.M.A.N. parameters */ @@ -48,6 +48,7 @@ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ #define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ +#define BATADV_MCAST_WORK_PERIOD 500 /* 0.5 seconds */ #define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) @@ -185,7 +186,6 @@ enum batadv_uev_type { #include <linux/bitops.h> /* for packet.h */ #include <linux/compiler.h> -#include <linux/cpumask.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> /* for packet.h */ #include <linux/if_vlan.h> @@ -200,8 +200,8 @@ struct packet_type; struct seq_file; struct sk_buff; -#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \ - (int)(vid & VLAN_VID_MASK) : -1) +#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \ + (int)((vid) & VLAN_VID_MASK) : -1) extern struct list_head batadv_hardif_list; @@ -284,26 +284,6 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) -/** - * batadv_sum_counter - Sum the cpu-local counters for index 'idx' - * @bat_priv: the bat priv with all the soft interface information - * @idx: index of counter to sum up - * - * Return: sum of all cpu-local counters - */ -static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) -{ - u64 *counters, sum = 0; - int cpu; - - for_each_possible_cpu(cpu) { - counters = per_cpu_ptr(bat_priv->bat_counters, cpu); - sum += counters[idx]; - } - - return sum; -} - /* Define a macro to reach the control buffer of the skb. The members of the * control buffer are defined in struct batadv_skb_cb in types.h. * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 13661f43386f..090a69fc342e 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -33,6 +33,7 @@ #include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> @@ -48,6 +49,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/workqueue.h> #include <net/addrconf.h> #include <net/if_inet6.h> #include <net/ip.h> @@ -60,6 +62,18 @@ #include "translation-table.h" #include "tvlv.h" +static void batadv_mcast_mla_update(struct work_struct *work); + +/** + * batadv_mcast_start_timer - schedule the multicast periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) +{ + queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work, + msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD)); +} + /** * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists * @soft_iface: netdev struct of the mesh interface @@ -231,19 +245,15 @@ out: /** * batadv_mcast_mla_list_free - free a list of multicast addresses - * @bat_priv: the bat priv with all the soft interface information * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. */ -static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, - struct hlist_head *mcast_list) +static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); - hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { hlist_del(&mcast_entry->list); kfree(mcast_entry); @@ -259,6 +269,8 @@ static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, * translation table except the ones listed in the given mcast_list. * * If mcast_list is NULL then all are retracted. + * + * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -266,7 +278,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); + WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { @@ -291,6 +303,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, * * Adds multicast listener announcements from the given mcast_list to the * translation table if they have not been added yet. + * + * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -298,7 +312,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); + WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); if (!mcast_list) return; @@ -532,13 +546,18 @@ update: } /** - * batadv_mcast_mla_update - update the own MLAs + * __batadv_mcast_mla_update - update the own MLAs * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. + * + * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list + * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are + * ensured by the non-parallel execution of the worker this function + * belongs to. */ -void batadv_mcast_mla_update(struct batadv_priv *bat_priv) +static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { struct net_device *soft_iface = bat_priv->soft_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; @@ -560,7 +579,30 @@ update: batadv_mcast_mla_tt_add(bat_priv, &mcast_list); out: - batadv_mcast_mla_list_free(bat_priv, &mcast_list); + batadv_mcast_mla_list_free(&mcast_list); +} + +/** + * batadv_mcast_mla_update - update the own MLAs + * @work: kernel work struct + * + * Updates the own multicast listener announcements in the translation + * table as well as the own, announced multicast tvlv container. + * + * In the end, reschedules the work timer. + */ +static void batadv_mcast_mla_update(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_mcast *priv_mcast; + struct batadv_priv *bat_priv; + + delayed_work = to_delayed_work(work); + priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work); + bat_priv = container_of(priv_mcast, struct batadv_priv, mcast); + + __batadv_mcast_mla_update(bat_priv); + batadv_mcast_start_timer(bat_priv); } /** @@ -1132,6 +1174,9 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + + INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); + batadv_mcast_start_timer(bat_priv); } #ifdef CONFIG_BATMAN_ADV_DEBUGFS @@ -1243,12 +1288,13 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) */ void batadv_mcast_free(struct batadv_priv *bat_priv) { + cancel_delayed_work_sync(&bat_priv->mcast.work); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); - spin_lock_bh(&bat_priv->tt.commit_lock); + /* safely calling outside of worker, as worker was canceled above */ batadv_mcast_mla_tt_retract(bat_priv, NULL); - spin_unlock_bh(&bat_priv->tt.commit_lock); } /** diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 1fb00ba84907..2cddaf52a21d 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -39,8 +39,6 @@ enum batadv_forw_mode { #ifdef CONFIG_BATMAN_ADV_MCAST -void batadv_mcast_mla_update(struct batadv_priv *bat_priv); - enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); @@ -55,10 +53,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); #else -static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) -{ -} - static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 64cb6acbe0a6..062738163bdc 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -20,11 +20,14 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/cache.h> #include <linux/errno.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/genetlink.h> #include <linux/if_ether.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> @@ -48,14 +51,7 @@ #include "tp_meter.h" #include "translation-table.h" -struct genl_family batadv_netlink_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = BATADV_NL_NAME, - .version = 1, - .maxattr = BATADV_ATTR_MAX, - .netnsok = true, -}; +struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { @@ -534,7 +530,7 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } -static struct genl_ops batadv_netlink_ops[] = { +static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, .flags = GENL_ADMIN_PERM, @@ -610,6 +606,19 @@ static struct genl_ops batadv_netlink_ops[] = { }; +struct genl_family batadv_netlink_family __ro_after_init = { + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = batadv_netlink_ops, + .n_ops = ARRAY_SIZE(batadv_netlink_ops), + .mcgrps = batadv_netlink_mcgrps, + .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), +}; + /** * batadv_netlink_register - register batadv genl netlink family */ @@ -617,9 +626,7 @@ void __init batadv_netlink_register(void) { int ret; - ret = genl_register_family_with_ops_groups(&batadv_netlink_family, - batadv_netlink_ops, - batadv_netlink_mcgrps); + ret = genl_register_family(&batadv_netlink_family); if (ret) pr_warn("unable to register netlink family"); } diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e3baf697a35c..ab5a3bf0765f 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -44,7 +44,6 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/stat.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> @@ -261,10 +260,16 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path) /** * batadv_nc_packet_free - frees nc packet * @nc_packet: the nc packet to free + * @dropped: whether the packet is freed because is is dropped */ -static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, + bool dropped) { - kfree_skb(nc_packet->skb); + if (dropped) + kfree_skb(nc_packet->skb); + else + consume_skb(nc_packet->skb); + batadv_nc_path_put(nc_packet->nc_path); kfree(nc_packet); } @@ -577,7 +582,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) { batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node); nc_packet->skb = NULL; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); } /** @@ -611,7 +616,7 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, /* purge nc packet */ list_del(&nc_packet->list); - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, true); res = true; @@ -1209,11 +1214,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, } /* skb_src is now coded into skb_dest, so free it */ - kfree_skb(skb_src); + consume_skb(skb_src); /* avoid duplicate free of skb from nc_packet */ nc_packet->skb = NULL; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); /* Send the coded packet and return true */ batadv_send_unicast_skb(skb_dest, first_dest); @@ -1400,7 +1405,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free * our ref */ - kfree_skb(skb); + consume_skb(skb); } /** @@ -1724,7 +1729,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ether_addr_copy(unicast_packet->dest, orig_dest); unicast_packet->ttvn = ttvn; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); return unicast_packet; } @@ -1814,11 +1819,11 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) - return NET_RX_DROP; + goto free_skb; /* Make sure we can access (and remove) header */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto free_skb; coded_packet = (struct batadv_coded_packet *)skb->data; ethhdr = eth_hdr(skb); @@ -1826,7 +1831,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) - return NET_RX_DROP; + goto free_skb; /* Update stat counter */ if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) @@ -1836,7 +1841,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, coded_packet); if (!nc_packet) { batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); - return NET_RX_DROP; + goto free_skb; } /* Make skb's linear, because decoding accesses the entire buffer */ @@ -1861,7 +1866,10 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return batadv_recv_unicast_packet(skb, recv_if); free_nc_packet: - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, true); +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } @@ -1961,17 +1969,16 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) if (!nc_dir) goto out; - file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, - &bat_priv->nc.min_tq); + file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq); if (!file) goto out; - file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, + file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir, &bat_priv->nc.max_fwd_delay); if (!file) goto out; - file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, + file = debugfs_create_u32("max_buffer_time", 0644, nc_dir, &bat_priv->nc.max_buffer_time); if (!file) goto out; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7c8d16086f0f..8f3b2969cc4e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -364,7 +364,7 @@ struct batadv_orig_ifinfo * batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { - struct batadv_orig_ifinfo *orig_ifinfo = NULL; + struct batadv_orig_ifinfo *orig_ifinfo; unsigned long reset_time; spin_lock_bh(&orig_node->neigh_list_lock); @@ -512,15 +512,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, * batadv_hardif_neigh_create - create a hardif neighbour node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve + * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) + const u8 *neigh_addr, + struct batadv_orig_node *orig_node) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hardif_neigh_node *hardif_neigh = NULL; + struct batadv_hardif_neigh_node *hardif_neigh; spin_lock_bh(&hard_iface->neigh_list_lock); @@ -536,6 +538,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); INIT_HLIST_NODE(&hardif_neigh->list); ether_addr_copy(hardif_neigh->addr, neigh_addr); + ether_addr_copy(hardif_neigh->orig, orig_node->orig); hardif_neigh->if_incoming = hard_iface; hardif_neigh->last_seen = jiffies; @@ -556,21 +559,23 @@ out: * node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve + * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) + const u8 *neigh_addr, + struct batadv_orig_node *orig_node) { - struct batadv_hardif_neigh_node *hardif_neigh = NULL; + struct batadv_hardif_neigh_node *hardif_neigh; /* first check without locking to avoid the overhead */ hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); if (hardif_neigh) return hardif_neigh; - return batadv_hardif_neigh_create(hard_iface, neigh_addr); + return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node); } /** @@ -630,7 +635,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node, goto out; hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, - neigh_addr); + neigh_addr, orig_node); if (!hardif_neigh) goto out; @@ -683,7 +688,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { - struct batadv_neigh_node *neigh_node = NULL; + struct batadv_neigh_node *neigh_node; /* first check without locking to avoid the overhead */ neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); @@ -1021,7 +1026,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, batadv_orig_node_vlan_put(vlan); for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { - INIT_HLIST_HEAD(&orig_node->fragments[i].head); + INIT_HLIST_HEAD(&orig_node->fragments[i].fragment_list); spin_lock_init(&orig_node->fragments[i].lock); orig_node->fragments[i].size = 0; } diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 6afc0b86950e..7a36bcfa0ba0 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -21,7 +21,7 @@ #include <asm/byteorder.h> #include <linux/types.h> -#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0) +#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0) /** * enum batadv_packettype - types for batman-adv encapsulated packets @@ -252,16 +252,6 @@ struct batadv_elp_packet { #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) /** - * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes - * @BATADV_TP_START: start a throughput meter run - * @BATADV_TP_STOP: stop a throughput meter run - */ -enum batadv_icmp_user_cmd_type { - BATADV_TP_START = 0, - BATADV_TP_STOP = 2, -}; - -/** * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7e8dc648b95a..6713bdf414cd 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -196,8 +196,8 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (!is_broadcast_ether_addr(ethhdr->h_dest)) return false; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + /* packet with invalid sender address */ + if (!is_valid_ether_addr(ethhdr->h_source)) return false; /* create a copy of the skb, if needed, to modify it. */ @@ -262,11 +262,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res == -1) - goto out; - - ret = NET_RX_SUCCESS; + if (res == NET_XMIT_SUCCESS) + ret = NET_RX_SUCCESS; + /* skb was consumed */ + skb = NULL; break; case BATADV_TP: if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet))) @@ -274,6 +274,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, batadv_tp_meter_recv(bat_priv, skb); ret = NET_RX_SUCCESS; + /* skb was consumed */ + skb = NULL; goto out; default: /* drop unknown type */ @@ -284,6 +286,9 @@ out: batadv_hardif_put(primary_if); if (orig_node) batadv_orig_node_put(orig_node); + + kfree_skb(skb); + return ret; } @@ -325,14 +330,20 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != -1) - ret = NET_RX_SUCCESS; + if (res == NET_RX_SUCCESS) + ret = NET_XMIT_SUCCESS; + + /* skb was consumed */ + skb = NULL; out: if (primary_if) batadv_hardif_put(primary_if); if (orig_node) batadv_orig_node_put(orig_node); + + kfree_skb(skb); + return ret; } @@ -349,21 +360,21 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; + /* packet with unicast indication but non-unicast recipient */ + if (!is_valid_ether_addr(ethhdr->h_dest)) + goto free_skb; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - goto out; + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) + goto free_skb; /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) - goto out; + goto free_skb; icmph = (struct batadv_icmp_header *)skb->data; @@ -372,17 +383,17 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph->msg_type == BATADV_ECHO_REQUEST) && (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { if (skb_linearize(skb) < 0) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) - goto out; + goto free_skb; ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur], ethhdr->h_dest); @@ -400,11 +411,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* get routing information */ orig_node = batadv_orig_hash_find(bat_priv, icmph->dst); if (!orig_node) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto put_orig_node; icmph = (struct batadv_icmp_header *)skb->data; @@ -413,12 +424,18 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* route it */ res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - if (res != -1) + if (res == NET_XMIT_SUCCESS) ret = NET_RX_SUCCESS; -out: + /* skb was consumed */ + skb = NULL; + +put_orig_node: if (orig_node) batadv_orig_node_put(orig_node); +free_skb: + kfree_skb(skb); + return ret; } @@ -445,12 +462,12 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, ethhdr = eth_hdr(skb); - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) + /* packet with unicast indication but non-unicast recipient */ + if (!is_valid_ether_addr(ethhdr->h_dest)) return -EBADR; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) return -EBADR; /* not for me */ @@ -667,18 +684,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); - goto out; + goto free_skb; } /* get routing information */ orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); if (!orig_node) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto put_orig_node; /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -702,8 +719,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - if (res == -1) - goto out; + if (res == NET_XMIT_SUCCESS) + ret = NET_RX_SUCCESS; + + /* skb was consumed */ + skb = NULL; /* translate transmit result into receive result */ if (res == NET_XMIT_SUCCESS) { @@ -713,11 +733,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len + ETH_HLEN); } - ret = NET_RX_SUCCESS; +put_orig_node: + batadv_orig_node_put(orig_node); +free_skb: + kfree_skb(skb); -out: - if (orig_node) - batadv_orig_node_put(orig_node); return ret; } @@ -902,14 +922,18 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); if (check < 0) - return NET_RX_DROP; + goto free_skb; /* we don't know about this type, drop it. */ unicast_packet = (struct batadv_unicast_packet *)skb->data; if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) - return NET_RX_DROP; + goto free_skb; return batadv_route_unicast_packet(skb, recv_if); + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } int batadv_recv_unicast_packet(struct sk_buff *skb, @@ -923,6 +947,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; bool is4addr; + int ret = NET_RX_DROP; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; @@ -942,9 +967,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); if (check < 0) - return NET_RX_DROP; + goto free_skb; if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) - return NET_RX_DROP; + goto free_skb; /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { @@ -982,7 +1007,14 @@ rx_success: return NET_RX_SUCCESS; } - return batadv_route_unicast_packet(skb, recv_if); + ret = batadv_route_unicast_packet(skb, recv_if); + /* skb was consumed */ + skb = NULL; + +free_skb: + kfree_skb(skb); + + return ret; } /** @@ -1004,15 +1036,15 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, int ret = NET_RX_DROP; if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) - return NET_RX_DROP; + goto free_skb; /* the header is likely to be modified while forwarding */ if (skb_cow(skb, hdr_size) < 0) - return NET_RX_DROP; + goto free_skb; /* packet needs to be linearized to access the tvlv content */ if (skb_linearize(skb) < 0) - return NET_RX_DROP; + goto free_skb; unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data; @@ -1020,17 +1052,21 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len); if (tvlv_buff_len > skb->len - hdr_size) - return NET_RX_DROP; + goto free_skb; ret = batadv_tvlv_containers_process(bat_priv, false, NULL, unicast_tvlv_packet->src, unicast_tvlv_packet->dst, tvlv_buff, tvlv_buff_len); - if (ret != NET_RX_SUCCESS) + if (ret != NET_RX_SUCCESS) { ret = batadv_route_unicast_packet(skb, recv_if); - else - consume_skb(skb); + /* skb was consumed */ + skb = NULL; + } + +free_skb: + kfree_skb(skb); return ret; } @@ -1056,20 +1092,22 @@ int batadv_recv_frag_packet(struct sk_buff *skb, if (batadv_check_unicast_packet(bat_priv, skb, sizeof(*frag_packet)) < 0) - goto out; + goto free_skb; frag_packet = (struct batadv_frag_packet *)skb->data; orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig); if (!orig_node_src) - goto out; + goto free_skb; skb->priority = frag_packet->priority + 256; /* Route the fragment if it is not for us and too big to be merged. */ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { + /* skb was consumed */ + skb = NULL; ret = NET_RX_SUCCESS; - goto out; + goto put_orig_node; } batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX); @@ -1077,20 +1115,24 @@ int batadv_recv_frag_packet(struct sk_buff *skb, /* Add fragment to buffer and merge if possible. */ if (!batadv_frag_skb_buffer(&skb, orig_node_src)) - goto out; + goto put_orig_node; /* Deliver merged packet to the appropriate handler, if it was * merged */ - if (skb) + if (skb) { batadv_batman_skb_recv(skb, recv_if->net_dev, &recv_if->batman_adv_ptype, NULL); + /* skb was consumed */ + skb = NULL; + } ret = NET_RX_SUCCESS; -out: - if (orig_node_src) - batadv_orig_node_put(orig_node_src); +put_orig_node: + batadv_orig_node_put(orig_node_src); +free_skb: + kfree_skb(skb); return ret; } @@ -1109,35 +1151,35 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; + goto free_skb; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - goto out; + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) + goto free_skb; /* ignore broadcasts sent by myself */ if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - goto out; + goto free_skb; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) - goto out; + goto free_skb; if (bcast_packet->ttl < 2) - goto out; + goto free_skb; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); if (!orig_node) - goto out; + goto free_skb; spin_lock_bh(&orig_node->bcast_seqno_lock); @@ -1165,18 +1207,18 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, /* check whether this has been sent by another originator before */ if (batadv_bla_check_bcast_duplist(bat_priv, skb)) - goto out; + goto free_skb; batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet)); /* rebroadcast packet */ - batadv_add_bcast_packet_to_list(bat_priv, skb, 1); + batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false); /* don't hand the broadcast up if it is from an originator * from the same backbone. */ if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) - goto out; + goto free_skb; if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size)) goto rx_success; @@ -1192,6 +1234,8 @@ rx_success: spin_unlock: spin_unlock_bh(&orig_node->bcast_seqno_lock); +free_skb: + kfree_skb(skb); out: if (orig_node) batadv_orig_node_put(orig_node); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 8d4e1f578574..9ea272ef6612 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -64,8 +64,11 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work); * If neigh_node is NULL, then the packet is broadcasted using hard_iface, * otherwise it is sent as unicast to the given neighbor. * - * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb) - * otherwise + * Regardless of the return value, the skb is consumed. + * + * Return: A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. */ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, @@ -73,7 +76,6 @@ int batadv_send_skb_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; - int ret; bat_priv = netdev_priv(hard_iface->soft_iface); @@ -111,15 +113,8 @@ int batadv_send_skb_packet(struct sk_buff *skb, /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. - * - * a negative value cannot be returned because it could be interepreted - * as not consumed skb by callers of batadv_send_skb_to_orig. */ - ret = dev_queue_xmit(skb); - if (ret < 0) - ret = NET_XMIT_DROP; - - return ret; + return dev_queue_xmit(skb); send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; @@ -165,11 +160,9 @@ int batadv_send_unicast_skb(struct sk_buff *skb, * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the - * skb is buffered for later transmit or the NET_XMIT status returned by the + * Return: negative errno code on a failure, -EINPROGRESS if the skb is + * buffered for later transmit or the NET_XMIT status returned by the * lower routine if the packet has been passed down. - * - * If the returning value is not -1 the skb has been consumed. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -177,12 +170,14 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; - int ret = -1; + int ret; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); - if (!neigh_node) - goto out; + if (!neigh_node) { + ret = -EINVAL; + goto free_skb; + } /* Check if the skb is too large to send in one piece and fragment * it if needed. @@ -191,8 +186,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ ret = batadv_frag_send_packet(skb, orig_node, neigh_node); + /* skb was consumed */ + skb = NULL; - goto out; + goto put_neigh_node; } /* try to network code the packet, if it is received on an interface @@ -204,9 +201,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, else ret = batadv_send_unicast_skb(skb, neigh_node); -out: - if (neigh_node) - batadv_neigh_node_put(neigh_node); + /* skb was consumed */ + skb = NULL; + +put_neigh_node: + batadv_neigh_node_put(neigh_node); +free_skb: + kfree_skb(skb); return ret; } @@ -327,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; - int res, ret = NET_XMIT_DROP; + int ret = NET_XMIT_DROP; if (!orig_node) goto out; @@ -364,13 +365,12 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; - res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != -1) - ret = NET_XMIT_SUCCESS; + ret = batadv_send_skb_to_orig(skb, orig_node, NULL); + /* skb was consumed */ + skb = NULL; out: - if (ret == NET_XMIT_DROP) - kfree_skb(skb); + kfree_skb(skb); return ret; } @@ -451,13 +451,19 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_forw_packet_free - free a forwarding packet * @forw_packet: The packet to free + * @dropped: whether the packet is freed because is is dropped * * This frees a forwarding packet and releases any resources it might * have claimed. */ -void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, + bool dropped) { - kfree_skb(forw_packet->skb); + if (dropped) + kfree_skb(forw_packet->skb); + else + consume_skb(forw_packet->skb); + if (forw_packet->if_incoming) batadv_hardif_put(forw_packet->if_incoming); if (forw_packet->if_outgoing) @@ -549,6 +555,7 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending + * @own_packet: true if it is a self-generated broadcast packet * * add a broadcast packet to the queue and setup timers. broadcast packets * are sent multiple times to increase probability for being received. @@ -560,9 +567,10 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, */ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, - unsigned long delay) + unsigned long delay, + bool own_packet) { - struct batadv_hard_iface *primary_if = NULL; + struct batadv_hard_iface *primary_if; struct batadv_forw_packet *forw_packet; struct batadv_bcast_packet *bcast_packet; struct sk_buff *newskb; @@ -586,9 +594,8 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, bcast_packet = (struct batadv_bcast_packet *)newskb->data; bcast_packet->ttl--; - skb_reset_mac_header(newskb); - forw_packet->skb = newskb; + forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); @@ -597,7 +604,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, return NETDEV_TX_OK; err_packet_free: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); err: return NETDEV_TX_BUSY; } @@ -605,11 +612,17 @@ err: static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; + struct batadv_hardif_neigh_node *neigh_node; struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; + struct batadv_bcast_packet *bcast_packet; struct sk_buff *skb1; struct net_device *soft_iface; struct batadv_priv *bat_priv; + bool dropped = false; + u8 *neigh_addr; + u8 *orig_neigh; + int ret = 0; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, @@ -621,11 +634,17 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) hlist_del(&forw_packet->list); spin_unlock_bh(&bat_priv->forw_bcast_list_lock); - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { + dropped = true; goto out; + } - if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) + if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) { + dropped = true; goto out; + } + + bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data; /* rebroadcast packet */ rcu_read_lock(); @@ -636,6 +655,49 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (forw_packet->num_packets >= hard_iface->num_bcasts) continue; + if (forw_packet->own) { + neigh_node = NULL; + } else { + neigh_addr = eth_hdr(forw_packet->skb)->h_source; + neigh_node = batadv_hardif_neigh_get(hard_iface, + neigh_addr); + } + + orig_neigh = neigh_node ? neigh_node->orig : NULL; + + ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig, + orig_neigh); + + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n", + bcast_packet->orig, + hard_iface->net_dev->name, type); + + if (neigh_node) + batadv_hardif_neigh_put(neigh_node); + + continue; + } + + if (neigh_node) + batadv_hardif_neigh_put(neigh_node); + if (!kref_get_unless_zero(&hard_iface->refcount)) continue; @@ -658,7 +720,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) } out: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, dropped); } void @@ -699,7 +761,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); } } spin_unlock_bh(&bat_priv->forw_bcast_list_lock); @@ -726,7 +788,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); } } spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 999f78683d9e..c58019475025 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -27,7 +27,8 @@ struct sk_buff; -void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, + bool dropped); struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, @@ -46,7 +47,8 @@ int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh_node); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, - unsigned long delay); + unsigned long delay, + bool own_packet); void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 49e16b6e0ba3..7b3494ae6ad9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -22,6 +22,7 @@ #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/cpumask.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -116,6 +117,26 @@ static int batadv_interface_release(struct net_device *dev) return 0; } +/** + * batadv_sum_counter - Sum the cpu-local counters for index 'idx' + * @bat_priv: the bat priv with all the soft interface information + * @idx: index of counter to sum up + * + * Return: sum of all cpu-local counters + */ +static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) +{ + u64 *counters, sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + counters = per_cpu_ptr(bat_priv->bat_counters, cpu); + sum += counters[idx]; + } + + return sum; +} + static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); @@ -336,12 +357,12 @@ send: seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); - batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay); + batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true); /* a copy is stored in the bcast list, therefore removing * the original skb. */ - kfree_skb(skb); + consume_skb(skb); /* unicast packet */ } else { @@ -365,7 +386,7 @@ send: ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, vid); } - if (ret == NET_XMIT_DROP) + if (ret != NET_XMIT_SUCCESS) goto dropped_freed; } diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 02d96f224c60..17c844196eb2 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -33,7 +33,6 @@ #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/slab.h> -#include <linux/stat.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/stringify.h> @@ -666,41 +665,36 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, return count; } -BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); -BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(aggregated_ogms, 0644, NULL); +BATADV_ATTR_SIF_BOOL(bonding, 0644, NULL); #ifdef CONFIG_BATMAN_ADV_BLA -BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, - batadv_bla_status_update); +BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, 0644, batadv_bla_status_update); #endif #ifdef CONFIG_BATMAN_ADV_DAT -BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, - batadv_dat_status_update); +BATADV_ATTR_SIF_BOOL(distributed_arp_table, 0644, batadv_dat_status_update); #endif -BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); -static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL); -static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode, - batadv_store_gw_mode); -BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, - 2 * BATADV_JITTER, INT_MAX, NULL); -BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, - BATADV_TQ_MAX_VALUE, NULL); -static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class, +BATADV_ATTR_SIF_BOOL(fragmentation, 0644, batadv_update_min_mtu); +static BATADV_ATTR(routing_algo, 0444, batadv_show_bat_algo, NULL); +static BATADV_ATTR(gw_mode, 0644, batadv_show_gw_mode, batadv_store_gw_mode); +BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, 0644, 2 * BATADV_JITTER, + INT_MAX, NULL); +BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, 0644, 0, BATADV_TQ_MAX_VALUE, + NULL); +static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class, batadv_store_gw_sel_class); -static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, +static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_MCAST -BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL); #endif #ifdef CONFIG_BATMAN_ADV_DEBUG -BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0, - BATADV_DBG_ALL, NULL); +BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL); #endif #ifdef CONFIG_BATMAN_ADV_NC -BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, - batadv_nc_status_update); +BATADV_ATTR_SIF_BOOL(network_coding, 0644, batadv_nc_status_update); #endif -static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR, - batadv_show_isolation_mark, batadv_store_isolation_mark); +static BATADV_ATTR(isolation_mark, 0644, batadv_show_isolation_mark, + batadv_store_isolation_mark); static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_aggregated_ogms, @@ -731,7 +725,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { NULL, }; -BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_VLAN_BOOL(ap_isolation, 0644, NULL); /* array of vlan specific sysfs attributes */ static struct batadv_attribute *batadv_vlan_attrs[] = { @@ -1116,14 +1110,13 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj, #endif -static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface, +static BATADV_ATTR(mesh_iface, 0644, batadv_show_mesh_iface, batadv_store_mesh_iface); -static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL); +static BATADV_ATTR(iface_status, 0444, batadv_show_iface_status, NULL); #ifdef CONFIG_BATMAN_ADV_BATMAN_V -BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR, +BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, 0644, 2 * BATADV_JITTER, INT_MAX, NULL); -static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR, - batadv_show_throughput_override, +static BATADV_ATTR(throughput_override, 0644, batadv_show_throughput_override, batadv_store_throughput_override); #endif diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2333777f919d..f1564520dfae 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -615,9 +615,6 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, batadv_tp_fill_prerandom(tp_vars, data, data_len); r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (r == -1) - kfree_skb(skb); - if (r == NET_XMIT_SUCCESS) return 0; @@ -1206,9 +1203,6 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /* send the ack */ r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (r == -1) - kfree_skb(skb); - if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { ret = BATADV_TP_REASON_DST_UNREACHABLE; goto out; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7f663092f6de..3cae8f4fd717 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -56,7 +56,6 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" -#include "multicast.h" #include "netlink.h" #include "originator.h" #include "packet.h" @@ -3795,9 +3794,6 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { lockdep_assert_held(&bat_priv->tt.commit_lock); - /* Update multicast addresses in local translation table */ - batadv_mcast_mla_update(bat_priv); - if (atomic_read(&bat_priv->tt.local_changes) < 1) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); @@ -3835,8 +3831,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, unsigned short vid) { - struct batadv_tt_local_entry *tt_local_entry = NULL; - struct batadv_tt_global_entry *tt_global_entry = NULL; + struct batadv_tt_local_entry *tt_local_entry; + struct batadv_tt_global_entry *tt_global_entry; struct batadv_softif_vlan *vlan; bool ret = false; @@ -3845,27 +3841,24 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, return false; if (!atomic_read(&vlan->ap_isolation)) - goto out; + goto vlan_put; tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); if (!tt_local_entry) - goto out; + goto vlan_put; tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid); if (!tt_global_entry) - goto out; - - if (!_batadv_is_ap_isolated(tt_local_entry, tt_global_entry)) - goto out; + goto local_entry_put; - ret = true; + if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry)) + ret = true; -out: + batadv_tt_global_entry_put(tt_global_entry); +local_entry_put: + batadv_tt_local_entry_put(tt_local_entry); +vlan_put: batadv_softif_vlan_put(vlan); - if (tt_global_entry) - batadv_tt_global_entry_put(tt_global_entry); - if (tt_local_entry) - batadv_tt_local_entry_put(tt_local_entry); return ret; } diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 77654f055f24..a783420356ae 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -600,7 +600,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, unsigned char *tvlv_buff; unsigned int tvlv_len; ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - int res; orig_node = batadv_orig_hash_find(bat_priv, dst); if (!orig_node) @@ -633,9 +632,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, tvlv_buff += sizeof(*tvlv_hdr); memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res == -1) - kfree_skb(skb); + batadv_send_skb_to_orig(skb, orig_node, NULL); out: batadv_orig_node_put(orig_node); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index b3dd1a381aad..98ebac05c571 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -123,8 +123,8 @@ struct batadv_hard_iface_bat_v { * @list: list node for batadv_hardif_list * @if_num: identificator of the interface * @if_status: status of the interface for batman-adv - * @net_dev: pointer to the net_device * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) + * @net_dev: pointer to the net_device * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -141,8 +141,8 @@ struct batadv_hard_iface { struct list_head list; s16 if_num; char if_status; - struct net_device *net_dev; u8 num_bcasts; + struct net_device *net_dev; struct kobject *hardif_obj; struct kref refcount; struct packet_type batman_adv_ptype; @@ -184,7 +184,7 @@ struct batadv_orig_ifinfo { /** * struct batadv_frag_table_entry - head in the fragment buffer table - * @head: head of list with fragments + * @fragment_list: head of list with fragments * @lock: lock to protect the list of fragments * @timestamp: time (jiffie) of last received fragment * @seqno: sequence number of the fragments in the list @@ -192,8 +192,8 @@ struct batadv_orig_ifinfo { * @total_size: expected size of the assembled packet */ struct batadv_frag_table_entry { - struct hlist_head head; - spinlock_t lock; /* protects head */ + struct hlist_head fragment_list; + spinlock_t lock; /* protects fragment_list */ unsigned long timestamp; u16 seqno; u16 size; @@ -408,6 +408,7 @@ struct batadv_hardif_neigh_node_bat_v { * struct batadv_hardif_neigh_node - unique neighbor per hard-interface * @list: list node for batadv_hard_iface::neigh_list * @addr: the MAC address of the neighboring interface + * @orig: the address of the originator this neighbor node belongs to * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received * @bat_v: B.A.T.M.A.N. V private data @@ -417,6 +418,7 @@ struct batadv_hardif_neigh_node_bat_v { struct batadv_hardif_neigh_node { struct hlist_node list; u8 addr[ETH_ALEN]; + u8 orig[ETH_ALEN]; struct batadv_hard_iface *if_incoming; unsigned long last_seen; #ifdef CONFIG_BATMAN_ADV_BATMAN_V @@ -706,8 +708,8 @@ struct batadv_priv_debug_log { /** * struct batadv_priv_gw - per mesh interface gateway data - * @list: list of available gateway nodes - * @list_lock: lock protecting gw_list & curr_gw + * @gateway_list: list of available gateway nodes + * @list_lock: lock protecting gateway_list & curr_gw * @curr_gw: pointer to currently selected gateway node * @mode: gateway operation: off, client or server (see batadv_gw_modes) * @sel_class: gateway selection class (applies if gw_mode client) @@ -716,8 +718,8 @@ struct batadv_priv_debug_log { * @reselect: bool indicating a gateway re-selection is in progress */ struct batadv_priv_gw { - struct hlist_head list; - spinlock_t list_lock; /* protects gw_list & curr_gw */ + struct hlist_head gateway_list; + spinlock_t list_lock; /* protects gateway_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ atomic_t mode; atomic_t sel_class; @@ -785,9 +787,10 @@ struct batadv_mcast_querier_state { * @num_want_all_ipv6: counter for items in want_all_ipv6_list * @want_lists_lock: lock for protecting modifications to mcast want lists * (traversals are rcu-locked) + * @work: work queue callback item for multicast TT and TVLV updates */ struct batadv_priv_mcast { - struct hlist_head mla_list; + struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */ struct hlist_head want_all_unsnoopables_list; struct hlist_head want_all_ipv4_list; struct hlist_head want_all_ipv6_list; @@ -802,6 +805,7 @@ struct batadv_priv_mcast { atomic_t num_want_all_ipv6; /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ spinlock_t want_lists_lock; + struct delayed_work work; }; #endif @@ -1363,7 +1367,7 @@ struct batadv_skb_cb { /** * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded - * @list: list node for batadv_socket_client::queue_list + * @list: list node for batadv_priv::forw_{bat,bcast}_list * @send_time: execution time for delayed_work (packet sending) * @own: bool for locally generated packets (local OGMs are re-scheduled after * sending) diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index f4fcb4a9d5c1..2b875edf77e1 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -211,7 +211,6 @@ static const struct net_device_ops bnep_netdev_ops = { .ndo_set_rx_mode = bnep_net_set_mc_list, .ndo_set_mac_address = bnep_net_set_mac_addr, .ndo_tx_timeout = bnep_net_timeout, - .ndo_change_mtu = eth_change_mtu, }; @@ -222,6 +221,8 @@ void bnep_net_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; ether_setup(dev); + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &bnep_netdev_ops; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 89a687f3c0a3..c08e02b67818 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -185,7 +185,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > br_min_mtu(br)) + if (new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; @@ -410,6 +410,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->ageing_time = BR_DEFAULT_AGEING_TIME; + dev->max_mtu = ETH_MAX_MTU; br_netfilter_rtable_init(br); br_stp_timer_init(br); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6b43c8c88f19..e4a4176171c9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -535,9 +535,8 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, */ if (fdb->is_local) return 0; - br_warn(br, "adding interface %s with same address " - "as a received packet\n", - source ? source->dev->name : br->dev->name); + br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", + source ? source->dev->name : br->dev->name, addr, vid); fdb_delete(br, fdb); } @@ -583,9 +582,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { if (net_ratelimit()) - br_warn(br, "received packet on %s with " - "own address as source address\n", - source->dev->name); + br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", + source->dev->name, addr, vid); } else { /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst)) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2136e45f5277..073d54afa056 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/timer.h> #include <linux/inetdevice.h> +#include <linux/mroute.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -1638,6 +1639,21 @@ static void br_multicast_err_count(const struct net_bridge *br, u64_stats_update_end(&pstats->syncp); } +static void br_multicast_pim(struct net_bridge *br, + struct net_bridge_port *port, + const struct sk_buff *skb) +{ + unsigned int offset = skb_transport_offset(skb); + struct pimhdr *pimhdr, _pimhdr; + + pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr); + if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION || + pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) + return; + + br_multicast_mark_router(br, port); +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1650,8 +1666,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = ip_mc_check_igmp(skb, &skb_trimmed); if (err == -ENOMSG) { - if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { + if (ip_hdr(skb)->protocol == IPPROTO_PIM) + br_multicast_pim(br, port, skb); + } return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2fe9345c1407..8155bd2a5138 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb, - in, skb->dev, br_forward_finish, 1); + br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, + br_forward_finish); return 0; } @@ -845,8 +845,10 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) - return NF_STOP; + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } return NF_ACCEPT; } @@ -1016,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, - NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, + sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, elem); rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 9cebf47ac840..e7ef1a1ef3a6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT config NF_LOG_BRIDGE tristate "Bridge packet logging" + select NF_LOG_COMMON endif # NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 070cf134a22f..5929309beaa1 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, + (struct net_device *)xt_in(par), *diptr, shp, info->mac, shp); return info->target; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 9a11086ba6ff..e88bd4827ac1 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = par->net; + struct net *net = xt_net(par); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; @@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo */ if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, - par->in, par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, "%s", + info->prefix); else - ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 54816150608e..c1dc48686200 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -23,16 +23,16 @@ static unsigned int ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 2e7c4f974340..8d2a85e0594e 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (par->hooknum != NF_BR_BROUTING) + if (xt_hooknum(par) != NF_BR_BROUTING) /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, - br_port_get_rcu(par->in)->br->dev->dev_addr); + br_port_get_rcu(xt_in(par))->br->dev->dev_addr); else - ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); + ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ec94c6f1ae88..8fe36dc3aab2 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb) struct nf_hook_state state; int ret; - nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, + nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f5c11bbe27db..1ab6014cf0f8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, const struct ebt_table_info *private; struct xt_action_param acpar; - acpar.family = NFPROTO_BRIDGE; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; + acpar.state = state; acpar.hotdrop = false; - acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 1663df598545..c197b1f844ee 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index ad47a921b701..5974dbc1ea24 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 4b3df6b0e3b9..206dc266ecd2 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmp_code(priv->icmp_code)); break; } @@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmpv6_code(priv->icmp_code)); break; } diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index fa39fc298708..273cb07f57d8 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -390,8 +390,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, rcu_read_lock(); if (adapt_layer == NULL) { - pr_debug("link setup response but no client exist," - "send linkdown back\n"); + pr_debug("link setup response but no client exist, send linkdown back\n"); cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); goto unlock; } @@ -401,8 +400,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); if (phyinfo == NULL) { - pr_err("ERROR: Link Layer Device disappeared" - "while connecting\n"); + pr_err("ERROR: Link Layer Device disappeared while connecting\n"); goto unlock; } @@ -436,8 +434,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); break; default: - pr_err("Protocol error. Link setup response " - "- unknown channel type\n"); + pr_err("Protocol error. Link setup response - unknown channel type\n"); goto unlock; } if (!servicel) diff --git a/net/core/datagram.c b/net/core/datagram.c index b7de71f8d5d3..49816af8586b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -165,6 +165,7 @@ done: * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @destructor: invoked under the receive lock on successful dequeue * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts @@ -197,6 +198,8 @@ done: * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last) { @@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } atomic_inc(&skb->users); - } else + } else { __skb_unlink(skb, queue); - + if (destructor) + destructor(sk, skb); + } spin_unlock_irqrestore(&queue->lock, cpu_flags); *off = _off; return skb; @@ -262,6 +267,8 @@ no_packet: EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err) { struct sk_buff *skb, *last; @@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, - &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, + off, err, &last); if (skb) return skb; @@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, err); + NULL, &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); @@ -323,6 +330,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) } EXPORT_SYMBOL(__skb_free_datagram_locked); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags) +{ + int err = 0; + + if (flags & MSG_PEEK) { + err = -ENOENT; + spin_lock_bh(&sk->sk_receive_queue.lock); + if (skb == skb_peek(&sk->sk_receive_queue)) { + __skb_unlink(skb, &sk->sk_receive_queue); + atomic_dec(&skb->users); + err = 0; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + } + + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL(__sk_queue_drop_skb); + /** * skb_kill_datagram - Free a datagram skbuff forcibly * @sk: socket @@ -346,23 +374,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = 0; - - if (flags & MSG_PEEK) { - err = -ENOENT; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - atomic_dec(&skb->users); - err = 0; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } + int err = __sk_queue_drop_skb(sk, skb, flags); kfree_skb(skb); - atomic_inc(&sk->sk_drops); sk_mem_reclaim_partial(sk); - return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 6666b28b6815..6deba68ad9e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -139,7 +139,6 @@ #include <linux/errqueue.h> #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> -#include <linux/sctp.h> #include <linux/crash_dump.h> #include "net-sysfs.h" @@ -1944,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) +{ + if (dev->num_tc) { + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { + if ((txq - tc->offset) < tc->count) + return i; + } + + return -1; + } + + return 0; +} + #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) -static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, - int cpu, u16 index) +static bool remove_xps_queue(struct xps_dev_maps *dev_maps, + int tci, u16 index) { struct xps_map *map = NULL; int pos; if (dev_maps) - map = xmap_dereference(dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[tci]); + if (!map) + return false; - for (pos = 0; map && pos < map->len; pos++) { - if (map->queues[pos] == index) { - if (map->len > 1) { - map->queues[pos] = map->queues[--map->len]; - } else { - RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); - kfree_rcu(map, rcu); - map = NULL; - } + for (pos = map->len; pos--;) { + if (map->queues[pos] != index) + continue; + + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; break; } + + RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); + kfree_rcu(map, rcu); + return false; } - return map; + return true; } -static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +static bool remove_xps_queue_cpu(struct net_device *dev, + struct xps_dev_maps *dev_maps, + int cpu, u16 offset, u16 count) +{ + int num_tc = dev->num_tc ? : 1; + bool active = false; + int tci; + + for (tci = cpu * num_tc; num_tc--; tci++) { + int i, j; + + for (i = count, j = offset; i--; j++) { + if (!remove_xps_queue(dev_maps, cpu, j)) + break; + } + + active |= i < 0; + } + + return active; +} + +static void netif_reset_xps_queues(struct net_device *dev, u16 offset, + u16 count) { struct xps_dev_maps *dev_maps; int cpu, i; @@ -1986,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) if (!dev_maps) goto out_no_maps; - for_each_possible_cpu(cpu) { - for (i = index; i < dev->num_tx_queues; i++) { - if (!remove_xps_queue(dev_maps, cpu, i)) - break; - } - if (i == dev->num_tx_queues) - active = true; - } + for_each_possible_cpu(cpu) + active |= remove_xps_queue_cpu(dev, dev_maps, cpu, + offset, count); if (!active) { RCU_INIT_POINTER(dev->xps_maps, NULL); kfree_rcu(dev_maps, rcu); } - for (i = index; i < dev->num_tx_queues; i++) + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), NUMA_NO_NODE); @@ -2008,6 +2045,11 @@ out_no_maps: mutex_unlock(&xps_map_mutex); } +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); +} + static struct xps_map *expand_xps_map(struct xps_map *map, int cpu, u16 index) { @@ -2047,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + int i, cpu, tci, numa_node_id = -2; + int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); - int cpu, numa_node_id = -2; bool active = false; + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + + maps_sz = XPS_DEV_MAPS_SIZE(num_tc); + if (maps_sz < L1_CACHE_BYTES) + maps_sz = L1_CACHE_BYTES; + mutex_lock(&xps_map_mutex); dev_maps = xmap_dereference(dev->xps_maps); /* allocate memory for queue storage */ - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mask)) - continue; - + for_each_cpu_and(cpu, cpu_online_mask, mask) { if (!new_dev_maps) new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { @@ -2068,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -ENOMEM; } - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + tci = cpu * num_tc + tc; + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : NULL; map = expand_xps_map(map, cpu, index); if (!map) goto error; - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } if (!new_dev_maps) goto out_no_new_maps; for_each_possible_cpu(cpu) { + /* copy maps belonging to foreign traffic classes */ + for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } + + /* We need to explicitly update tci as prevous loop + * could break out early if dev_maps is NULL. + */ + tci = cpu * num_tc + tc; + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { /* add queue to CPU maps */ int pos = 0; - map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(new_dev_maps->cpu_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@ -2100,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, #endif } else if (dev_maps) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[cpu]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } + /* copy maps belonging to foreign traffic classes */ + for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } } rcu_assign_pointer(dev->xps_maps, new_dev_maps); /* Cleanup old maps */ - if (dev_maps) { - for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (!dev_maps) + goto out_no_old_maps; + + for_each_possible_cpu(cpu) { + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = xmap_dereference(dev_maps->cpu_map[tci]); if (map && map != new_map) kfree_rcu(map, rcu); } - - kfree_rcu(dev_maps, rcu); } + kfree_rcu(dev_maps, rcu); + +out_no_old_maps: dev_maps = new_dev_maps; active = true; @@ -2134,11 +2207,12 @@ out_no_new_maps: /* removes queue from unused CPUs */ for_each_possible_cpu(cpu) { - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) - continue; - - if (remove_xps_queue(dev_maps, cpu, index)) - active = true; + for (i = tc, tci = cpu * num_tc; i--; tci++) + active |= remove_xps_queue(dev_maps, tci, index); + if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) + active |= remove_xps_queue(dev_maps, tci, index); + for (i = num_tc - tc, tci++; --i; tci++) + active |= remove_xps_queue(dev_maps, tci, index); } /* free map if not active */ @@ -2154,11 +2228,14 @@ out_no_maps: error: /* remove any maps that we added */ for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : - NULL; - if (new_map && new_map != map) - kfree(new_map); + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[tci]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } } mutex_unlock(&xps_map_mutex); @@ -2169,6 +2246,44 @@ error: EXPORT_SYMBOL(netif_set_xps_queue); #endif +void netdev_reset_tc(struct net_device *dev) +{ +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} +EXPORT_SYMBOL(netdev_reset_tc); + +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues(dev, offset, count); +#endif + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} +EXPORT_SYMBOL(netdev_set_tc_queue); + +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = num_tc; + return 0; +} +EXPORT_SYMBOL(netdev_set_num_tc); + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -2487,141 +2602,6 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* skb_csum_offload_check - Driver helper function to determine if a device - * with limited checksum offload capabilities is able to offload the checksum - * for a given packet. - * - * Arguments: - * skb - sk_buff for the packet in question - * spec - contains the description of what device can offload - * csum_encapped - returns true if the checksum being offloaded is - * encpasulated. That is it is checksum for the transport header - * in the inner headers. - * checksum_help - when set indicates that helper function should - * call skb_checksum_help if offload checks fail - * - * Returns: - * true: Packet has passed the checksum checks and should be offloadable to - * the device (a driver may still need to check for additional - * restrictions of its device) - * false: Checksum is not offloadable. If checksum_help was set then - * skb_checksum_help was called to resolve checksum for non-GSO - * packets and when IP protocol is not SCTP - */ -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - struct iphdr *iph; - struct ipv6hdr *ipv6; - void *nhdr; - int protocol; - u8 ip_proto; - - if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) { - if (!spec->vlan_okay) - goto need_help; - } - - /* We check whether the checksum refers to a transport layer checksum in - * the outermost header or an encapsulated transport layer checksum that - * corresponds to the inner headers of the skb. If the checksum is for - * something else in the packet we need help. - */ - if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { - /* Non-encapsulated checksum */ - protocol = eproto_to_ipproto(vlan_get_protocol(skb)); - nhdr = skb_network_header(skb); - *csum_encapped = false; - if (spec->no_not_encapped) - goto need_help; - } else if (skb->encapsulation && spec->encap_okay && - skb_checksum_start_offset(skb) == - skb_inner_transport_offset(skb)) { - /* Encapsulated checksum */ - *csum_encapped = true; - switch (skb->inner_protocol_type) { - case ENCAP_TYPE_ETHER: - protocol = eproto_to_ipproto(skb->inner_protocol); - break; - case ENCAP_TYPE_IPPROTO: - protocol = skb->inner_protocol; - break; - } - nhdr = skb_inner_network_header(skb); - } else { - goto need_help; - } - - switch (protocol) { - case IPPROTO_IP: - if (!spec->ipv4_okay) - goto need_help; - iph = nhdr; - ip_proto = iph->protocol; - if (iph->ihl != 5 && !spec->ip_options_okay) - goto need_help; - break; - case IPPROTO_IPV6: - if (!spec->ipv6_okay) - goto need_help; - if (spec->no_encapped_ipv6 && *csum_encapped) - goto need_help; - ipv6 = nhdr; - nhdr += sizeof(*ipv6); - ip_proto = ipv6->nexthdr; - break; - default: - goto need_help; - } - -ip_proto_again: - switch (ip_proto) { - case IPPROTO_TCP: - if (!spec->tcp_okay || - skb->csum_offset != offsetof(struct tcphdr, check)) - goto need_help; - break; - case IPPROTO_UDP: - if (!spec->udp_okay || - skb->csum_offset != offsetof(struct udphdr, check)) - goto need_help; - break; - case IPPROTO_SCTP: - if (!spec->sctp_okay || - skb->csum_offset != offsetof(struct sctphdr, checksum)) - goto cant_help; - break; - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_DEST: { - u8 *opthdr = nhdr; - - if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) - goto need_help; - - ip_proto = opthdr[0]; - nhdr += (opthdr[1] + 1) << 3; - - goto ip_proto_again; - } - default: - goto need_help; - } - - /* Passed the tests for offloading checksum */ - return true; - -need_help: - if (csum_help && !skb_shinfo(skb)->gso_size) - skb_checksum_help(skb); -cant_help: - return false; -} -EXPORT_SYMBOL(__skb_csum_offload_chk); - __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -3216,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); + unsigned int tci = skb->sender_cpu - 1; + + if (dev->num_tc) { + tci *= dev->num_tc; + tci += netdev_get_prio_tc_map(dev, skb->priority); + } + + map = rcu_dereference(dev_maps->cpu_map[tci]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -4491,7 +4477,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) + if (skb->csum_bad) goto normal; gro_list_prepare(napi, skb); @@ -4504,7 +4490,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; - NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->recursion_counter = 0; @@ -5026,7 +5012,7 @@ EXPORT_SYMBOL(sk_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -5046,7 +5032,6 @@ void napi_hash_add(struct napi_struct *napi) spin_unlock(&napi_hash_lock); } -EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi @@ -5270,6 +5255,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -5284,11 +5276,30 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); + return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); /** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev) +{ + return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + +/** * netdev_has_any_upper_dev - Check if device is linked to some device * @dev: device * @@ -5299,7 +5310,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->adj_list.upper); } /** @@ -5326,6 +5337,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +/** + * netdev_has_any_lower_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to a lower device and return true in case + * it is. The caller must hold the RTNL lock. + */ +static bool netdev_has_any_lower_dev(struct net_device *dev) +{ + ASSERT_RTNL(); + + return !list_empty(&dev->adj_list.lower); +} + void *netdev_adjacent_get_private(struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -5362,16 +5387,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -5379,14 +5396,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->all_adj_list.upper) + if (&upper->list == &dev->adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev_rcu(dev, &iter); + udev; + udev = netdev_next_upper_dev_rcu(dev, &iter)) { + /* first is the upper device itself */ + ret = fn(udev, data); + if (ret) + return ret; + + /* then look at all of its upper devices */ + ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); /** * netdev_lower_get_next_private - Get the next ->private from the @@ -5469,55 +5513,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +static struct net_device *netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; - lower = list_entry(*iter, struct netdev_adjacent, list); + lower = list_entry((*iter)->next, struct netdev_adjacent, list); - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; - *iter = lower->list.next; + *iter = &lower->list; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - * lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev(dev, &iter); + ldev; + ldev = netdev_next_lower_dev(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; *iter = &lower->list; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev; + ldev = netdev_next_lower_dev_rcu(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu); /** * netdev_lower_get_first_private_rcu - Get the first ->private from the @@ -5590,7 +5669,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5600,7 +5678,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr += ref_nr; + adj->ref_nr += 1; + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n", + dev->name, adj_dev->name, adj->ref_nr); + return 0; } @@ -5610,12 +5691,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = ref_nr; + adj->ref_nr = 1; adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of link added from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", + dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); @@ -5654,17 +5735,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; + pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n", + dev->name, adj_dev->name, ref_nr); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { - pr_err("tried to remove device %s from %s\n", + pr_err("Adjacency does not exist for device %s from %s\n", dev->name, adj_dev->name); - BUG(); + WARN_ON(1); + return; } if (adj->ref_nr > ref_nr) { - pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, - ref_nr, adj->ref_nr-ref_nr); + pr_debug("adjacency: %s to %s ref_nr - %d = %d\n", + dev->name, adj_dev->name, ref_nr, + adj->ref_nr - ref_nr); adj->ref_nr -= ref_nr; return; } @@ -5676,7 +5762,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because link removed from %s to %s\n", + pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); @@ -5684,38 +5770,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, - u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list); return ret; } return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, u16 ref_nr, @@ -5726,40 +5801,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); -} - static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); - return ret; - } - - return 0; + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); } static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); @@ -5770,7 +5824,6 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; ASSERT_RTNL(); @@ -5779,10 +5832,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) + if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) + if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5804,80 +5857,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an - * versa, and don't forget the devices itself. All of these - * links are non-neighbours. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); - if (ret) - goto rollback_mesh; - } - } - - /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); - if (ret) - goto rollback_upper_mesh; - } - - /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); - if (ret) - goto rollback_lower_mesh; - } - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) - goto rollback_lower_mesh; + goto rollback; return 0; -rollback_lower_mesh: - to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - } - - i = NULL; - -rollback_upper_mesh: - to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - } - - i = j = NULL; - -rollback_mesh: - to_i = i; - to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - if (i == to_i && j == to_j) - break; - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - } - if (i == to_i) - break; - } - +rollback: __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; @@ -5934,7 +5922,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j; ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -5946,23 +5933,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - /* Here is the tricky part. We must remove all dev's lower - * devices from all upper_dev's upper devices and vice - * versa, to maintain the graph relationship. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - - /* remove also the devices itself from lower/upper device - * list - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -6500,9 +6470,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive. */ - if (new_mtu < 0) + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->max_mtu); + return -EINVAL; + } if (!netif_device_present(dev)) return -ENODEV; @@ -6777,6 +6756,7 @@ static void rollback_registered_many(struct list_head *head) /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -7655,8 +7635,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); #ifdef CONFIG_NET_SCHED @@ -7667,7 +7645,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; - dev->tx_queue_len = 1; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; } dev->num_tx_queues = txqs; diff --git a/net/core/devlink.c b/net/core/devlink.c index 1b5063088f1a..c14f8b661db9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); } -static struct genl_family devlink_nl_family = { - .id = GENL_ID_GENERATE, - .name = DEVLINK_GENL_NAME, - .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .netnsok = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, -}; +static struct genl_family devlink_nl_family; enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, @@ -608,6 +600,8 @@ static int devlink_port_type_set(struct devlink *devlink, if (devlink->ops && devlink->ops->port_type_set) { if (port_type == DEVLINK_PORT_TYPE_NOTSET) return -EINVAL; + if (port_type == devlink_port->type) + return 0; err = devlink->ops->port_type_set(devlink_port, port_type); if (err) return err; @@ -1618,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = { }, }; +static struct genl_family devlink_nl_family __ro_after_init = { + .name = DEVLINK_GENL_NAME, + .version = DEVLINK_GENL_VERSION, + .maxattr = DEVLINK_ATTR_MAX, + .netnsok = true, + .pre_doit = devlink_nl_pre_doit, + .post_doit = devlink_nl_post_doit, + .module = THIS_MODULE, + .ops = devlink_nl_ops, + .n_ops = ARRAY_SIZE(devlink_nl_ops), + .mcgrps = devlink_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), +}; + /** * devlink_alloc - Allocate new devlink instance resources * @@ -1840,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); static int __init devlink_module_init(void) { - return genl_register_family_with_ops_groups(&devlink_nl_family, - devlink_nl_ops, - devlink_nl_mcgrps); + return genl_register_family(&devlink_nl_family); } static void __exit devlink_module_exit(void) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c61125..8e0c0635ee97 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -59,12 +59,7 @@ struct dm_hw_stat_delta { unsigned long last_drop_val; }; -static struct genl_family net_drop_monitor_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "NET_DM", - .version = 2, -}; +static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); @@ -351,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = { }, }; +static struct genl_family net_drop_monitor_family __ro_after_init = { + .hdrsize = 0, + .name = "NET_DM", + .version = 2, + .module = THIS_MODULE, + .ops = dropmon_ops, + .n_ops = ARRAY_SIZE(dropmon_ops), + .mcgrps = dropmon_mcgrps, + .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), +}; + static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; @@ -367,8 +373,7 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family_with_ops_groups(&net_drop_monitor_family, - dropmon_ops, dropmon_mcgrps); + rc = genl_register_family(&net_drop_monitor_family); if (rc) { pr_err("Could not create drop monitor netlink family\n"); return rc; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index be4629c344a6..b6791d94841d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include <net/fib_rules.h> #include <net/ip_tunnels.h> +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->l3mdev != rule->l3mdev) continue; + if (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end)) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size_64bit(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8) /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && - nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/filter.c b/net/core/filter.c index b391209838ef..dece94fef005 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2544,6 +2544,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 69e4463a4b1b..b481a4a6d3ec 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -726,7 +726,7 @@ EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; -u32 __skb_get_hash_symmetric(struct sk_buff *skb) +u32 __skb_get_hash_symmetric(const struct sk_buff *skb) { struct flow_keys keys; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e5f84c26ba1a..03976e939818 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: @@ -130,6 +132,19 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, } EXPORT_SYMBOL(lwtunnel_build_state); +void lwtstate_free(struct lwtunnel_state *lws) +{ + const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; + + if (ops->destroy_state) { + ops->destroy_state(lws); + kfree_rcu(lws, rcu); + } else { + kfree(lws); + } +} +EXPORT_SYMBOL(lwtstate_free); + int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6e4f34721080..b0c04cf4851d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { + struct kobject *kobj = &dev->_rx[i].kobj; + + if (!list_empty(&dev_net(dev)->exit_list)) + kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) - sysfs_remove_group(&dev->_rx[i].kobj, - dev->sysfs_rx_queue_group); - kobject_put(&dev->_rx[i].kobj); + sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + kobject_put(kobj); } return error; @@ -1021,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } -#ifdef CONFIG_XPS static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; @@ -1033,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } +static ssize_t show_traffic_class(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + struct net_device *dev = queue->dev; + int index = get_netdev_queue_index(queue); + int tc = netdev_txq_to_tc(dev, index); + + if (tc < 0) + return -EINVAL; + + return sprintf(buf, "%u\n", tc); +} + +#ifdef CONFIG_XPS static ssize_t show_tx_maxrate(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) @@ -1075,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate = static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); +static struct netdev_queue_attribute queue_traffic_class = + __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL); + #ifdef CONFIG_BQL /* * Byte queue limits sysfs structures and functions. @@ -1190,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { struct net_device *dev = queue->dev; + int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; index = get_netdev_queue_index(queue); + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - for_each_possible_cpu(i) { - struct xps_map *map = - rcu_dereference(dev_maps->cpu_map[i]); - if (map) { - int j; - for (j = 0; j < map->len; j++) { - if (map->queues[j] == index) { - cpumask_set_cpu(i, mask); - break; - } + for_each_possible_cpu(cpu) { + int i, tci = cpu * num_tc + tc; + struct xps_map *map; + + map = rcu_dereference(dev_maps->cpu_map[tci]); + if (!map) + continue; + + for (i = map->len; i--;) { + if (map->queues[i] == index) { + cpumask_set_cpu(cpu, mask); + break; } } } @@ -1260,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute = static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, + &queue_traffic_class.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, &queue_tx_maxrate.attr, @@ -1340,6 +1370,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct netdev_queue *queue = dev->_tx + i; + if (!list_empty(&dev_net(dev)->exit_list)) + queue->kobj.uevent_suppress = 1; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); #endif @@ -1525,6 +1557,9 @@ void netdev_unregister_kobject(struct net_device *ndev) { struct device *dev = &(ndev->dev); + if (!list_empty(&dev_net(ndev)->exit_list)) + dev_set_uevent_suppress(dev, 1); + kobject_get(&dev->kobj); remove_queue_kobjects(ndev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f61c0e02a413..1309d78e2a64 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -382,7 +382,14 @@ struct net *copy_net_ns(unsigned long flags, get_user_ns(user_ns); - mutex_lock(&net_mutex); + rv = mutex_lock_killable(&net_mutex); + if (rv < 0) { + net_free(net); + dec_net_namespaces(ucounts); + put_user_ns(user_ns); + return ERR_PTR(rv); + } + net->ucounts = ucounts; rv = setup_net(net, user_ns); if (rv == 0) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e3e0087245b..0b2a6e94af2d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3725,7 +3725,6 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; spin_unlock_irqrestore(&q->lock, flags); - sk->sk_err = err; if (err) sk->sk_error_report(sk); diff --git a/net/core/sock.c b/net/core/sock.c index 5e3ca414357e..14e6145be33b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2080,37 +2080,31 @@ void __sk_flush_backlog(struct sock *sk) */ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc; - DEFINE_WAIT(wait); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); /** - * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * __sk_mem_raise_allocated - increase memory_allocated * @sk: socket * @size: memory size to allocate + * @amt: pages to allocate * @kind: allocation type * - * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means - * rmem allocation. This function assumes that protocols which have - * memory_pressure use sk_wmem_queued as write buffer accounting. + * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc */ -int __sk_mem_schedule(struct sock *sk, int size, int kind) +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; - int amt = sk_mem_pages(size); - long allocated; - - sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - - allocated = sk_memory_allocated_add(sk, amt); + long allocated = sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg && !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) @@ -2171,9 +2165,6 @@ suppress_allocation: trace_sock_exceed_buf_limit(sk, prot, allocated); - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - sk_memory_allocated_sub(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) @@ -2181,18 +2172,40 @@ suppress_allocation: return 0; } +EXPORT_SYMBOL(__sk_mem_raise_allocated); + +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + int ret, amt = sk_mem_pages(size); + + sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + ret = __sk_mem_raise_allocated(sk, size, amt, kind); + if (!ret) + sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + return ret; +} EXPORT_SYMBOL(__sk_mem_schedule); /** - * __sk_mem_reclaim - reclaim memory_allocated + * __sk_mem_reduce_allocated - reclaim memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of quanta + * + * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc */ -void __sk_mem_reclaim(struct sock *sk, int amount) +void __sk_mem_reduce_allocated(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; sk_memory_allocated_sub(sk, amount); - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); @@ -2201,6 +2214,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } +EXPORT_SYMBOL(__sk_mem_reduce_allocated); + +/** + * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated + * @sk: socket + * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + */ +void __sk_mem_reclaim(struct sock *sk, int amount) +{ + amount >>= SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + __sk_mem_reduce_allocated(sk, amount); +} EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) @@ -2436,8 +2462,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/core/stream.c b/net/core/stream.c index 1086c8b280a8..f575bcf64af2 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -53,8 +53,8 @@ void sk_stream_write_space(struct sock *sk) */ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct task_struct *tsk = current; - DEFINE_WAIT(wait); int done; do { @@ -68,13 +68,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, !sk->sk_err && !((1 << sk->sk_state) & - ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); - finish_wait(sk_sleep(sk), &wait); + ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); return 0; @@ -94,16 +94,16 @@ static inline int sk_stream_closing(struct sock *sk) void sk_stream_wait_close(struct sock *sk, long timeout) { if (timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); do { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) + if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait)) break; } while (!signal_pending(current) && timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } } EXPORT_SYMBOL(sk_stream_wait_close); @@ -119,16 +119,16 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) long vm_wait = 0; long current_timeo = *timeo_p; bool noblock = (*timeo_p ? false : true); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; + add_wait_queue(sk_sleep(sk), &wait); + while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; if (!*timeo_p) { @@ -147,7 +147,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) sk_wait_event(sk, ¤t_timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || (sk_stream_memory_free(sk) && - !vm_wait)); + !vm_wait), &wait); sk->sk_write_pending--; if (vm_wait) { @@ -161,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b567c8725aea..fda321d814d6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -590,13 +590,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (inet_csk_reqsk_queue_is_full(sk)) goto drop; - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 715e5d1dc107..adfc790f7193 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -326,7 +326,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (inet_csk_reqsk_queue_is_full(sk)) goto drop; - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 13d6b1a6e0fc..a90ed67027b0 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1718,7 +1718,7 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, * See if there is data ready to read, sleep if there isn't */ for(;;) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk->sk_err) goto out; @@ -1749,11 +1749,11 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, goto out; } - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); + sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } skb_queue_walk_safe(queue, skb, n) { @@ -1999,19 +1999,19 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) * size. */ if (dn_queue_too_long(scp, queue, flags)) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (flags & MSG_DONTWAIT) { err = -EWOULDBLOCK; goto out; } - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, - !dn_queue_too_long(scp, queue, flags)); + !dn_queue_too_long(scp, queue, flags), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); continue; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6b1282c006b1..d0c7bce88743 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -641,7 +641,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* ethtool operations *******************************************************/ static int -dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +dsa_slave_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); int err; @@ -650,19 +651,20 @@ dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (p->phy != NULL) { err = phy_read_status(p->phy); if (err == 0) - err = phy_ethtool_gset(p->phy, cmd); + err = phy_ethtool_ksettings_get(p->phy, cmd); } return err; } static int -dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +dsa_slave_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); if (p->phy != NULL) - return phy_ethtool_sset(p->phy, cmd); + return phy_ethtool_ksettings_set(p->phy, cmd); return -EOPNOTSUPP; } @@ -990,8 +992,6 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) } static const struct ethtool_ops dsa_slave_ethtool_ops = { - .get_settings = dsa_slave_get_settings, - .set_settings = dsa_slave_set_settings, .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, .get_regs = dsa_slave_get_regs, @@ -1007,6 +1007,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, + .get_link_ksettings = dsa_slave_get_link_ksettings, + .set_link_ksettings = dsa_slave_set_link_ksettings, }; static const struct net_device_ops dsa_slave_netdev_ops = { @@ -1245,6 +1247,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; + slave_dev->min_mtu = 0; + slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 02acfff36028..8c5a479681ca 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,6 +62,7 @@ #include <net/dsa.h> #include <net/flow_dissector.h> #include <linux/uaccess.h> +#include <net/pkt_sched.h> __setup("ether=", netdev_boot_setup); @@ -322,8 +323,7 @@ EXPORT_SYMBOL(eth_mac_addr); */ int eth_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) - return -EINVAL; + netdev_warn(dev, "%s is deprecated\n", __func__); dev->mtu = new_mtu; return 0; } @@ -357,8 +357,10 @@ void ether_setup(struct net_device *dev) dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; - dev->tx_queue_len = 1000; /* Ethernet wants good queues */ + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 16737cd8dae8..fc65b145f6e7 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev) random_ether_addr(dev->dev_addr); ether_setup(dev); + dev->min_mtu = 0; dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index d4d1617f43a8..1ab30e7d3f99 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, }; -static struct genl_family hsr_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "HSR", - .version = 1, - .maxattr = HSR_A_MAX, -}; +static struct genl_family hsr_genl_family; static const struct genl_multicast_group hsr_mcgrps[] = { { .name = "hsr-network", }, @@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = { }, }; +static struct genl_family hsr_genl_family __ro_after_init = { + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, + .module = THIS_MODULE, + .ops = hsr_ops, + .n_ops = ARRAY_SIZE(hsr_ops), + .mcgrps = hsr_mcgrps, + .n_mcgrps = ARRAY_SIZE(hsr_mcgrps), +}; + int __init hsr_netlink_init(void) { int rc; @@ -475,8 +481,7 @@ int __init hsr_netlink_init(void) if (rc) goto fail_rtnl_link_register; - rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops, - hsr_mcgrps); + rc = genl_register_family(&hsr_genl_family); if (rc) goto fail_genl_register_family; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8133c07ceee..6bde9e5a5503 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -28,14 +28,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); -struct genl_family nl802154_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = IEEE802154_NL_NAME, - .version = 1, - .maxattr = IEEE802154_ATTR_MAX, -}; - /* Requests to userspace */ struct sk_buff *ieee802154_nl_create(int flags, u8 req) { @@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = { [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, }; +struct genl_family nl802154_family __ro_after_init = { + .hdrsize = 0, + .name = IEEE802154_NL_NAME, + .version = 1, + .maxattr = IEEE802154_ATTR_MAX, + .module = THIS_MODULE, + .ops = ieee8021154_ops, + .n_ops = ARRAY_SIZE(ieee8021154_ops), + .mcgrps = ieee802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), +}; + int __init ieee802154_nl_init(void) { - return genl_register_family_with_ops_groups(&nl802154_family, - ieee8021154_ops, - ieee802154_mcgrps); + return genl_register_family(&nl802154_family); } void ieee802154_nl_exit(void) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d90a4ed5b8a0..fc60cd061f39 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -26,23 +26,8 @@ #include "rdev-ops.h" #include "core.h" -static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - -static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl802154_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = NL802154_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL802154_ATTR_MAX, - .netnsok = true, - .pre_doit = nl802154_pre_doit, - .post_doit = nl802154_post_doit, -}; +static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { @@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - nl802154_fam.attrbuf, nl802154_fam.maxattr, + genl_family_attrbuf(&nl802154_fam), + nl802154_fam.maxattr, nl802154_policy); if (err) goto out_unlock; *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), - nl802154_fam.attrbuf); + genl_family_attrbuf(&nl802154_fam)); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; @@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { - struct nlattr **tb = nl802154_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, nl802154_fam.maxattr, nl802154_policy); @@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; +static struct genl_family nl802154_fam __ro_after_init = { + .name = NL802154_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL802154_ATTR_MAX, + .netnsok = true, + .pre_doit = nl802154_pre_doit, + .post_doit = nl802154_post_doit, + .module = THIS_MODULE, + .ops = nl802154_ops, + .n_ops = ARRAY_SIZE(nl802154_ops), + .mcgrps = nl802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), +}; + /* initialisation/exit functions */ -int nl802154_init(void) +int __init nl802154_init(void) { - return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops, - nl802154_mcgrps); + return genl_register_family(&nl802154_fam); } void nl802154_exit(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 300b06888fdf..28e051a8e847 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -430,6 +430,14 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_RAW_DIAG + tristate "RAW: socket monitoring interface" + depends on INET_DIAG && (IPV6 || IPV6=n) + default n + ---help--- + Support for RAW socket monitoring interface used by the ss tool. + If unsure, say Y. + config INET_DIAG_DESTROY bool "INET: allow privileged process to administratively close sockets" depends on INET_DIAG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bc6a6c8b9bcd..48af58a5686e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o +obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..d93eea8e2409 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -610,6 +610,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 030d1531e897..805f6607f8d9 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) return err; } -static struct genl_family fou_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = FOU_GENL_NAME, - .version = FOU_GENL_VERSION, - .maxattr = FOU_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, @@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = { }, }; +static struct genl_family fou_nl_family __ro_after_init = { + .hdrsize = 0, + .name = FOU_GENL_NAME, + .version = FOU_GENL_VERSION, + .maxattr = FOU_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = fou_nl_ops, + .n_ops = ARRAY_SIZE(fou_nl_ops), +}; + size_t fou_encap_hlen(struct ip_tunnel_encap *e) { return sizeof(struct udphdr); @@ -1086,8 +1090,7 @@ static int __init fou_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&fou_nl_family, - fou_nl_ops); + ret = genl_register_family(&fou_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 48734ee6293f..691146abde2d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61a9deec2993..d5d3ead0a6c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e4d16fc5bbb3..4dea33e5f295 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; + /* + * RAW sockets might have user-defined protocols assigned, + * so report the one supplied on socket creation. + */ + if (sk->sk_type == SOCK_RAW) { + if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) + goto errout; + } + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; @@ -852,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + int i, num, s_i, s_num; + struct sock *sk; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -863,16 +873,15 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, s_num = num = cb->args[2]; if (cb->args[0] == 0) { - if (!(idiag_states & TCPF_LISTEN)) + if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) goto skip_listen_ht; for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; - spin_lock_bh(&ilb->lock); + spin_lock(&ilb->lock); sk_for_each(sk, &ilb->head) { struct inet_sock *inet = inet_sk(sk); @@ -892,26 +901,18 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, r->id.idiag_sport) goto next_listen; - if (r->id.idiag_dport || - cb->args[3] > 0) - goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); goto done; } next_listen: - cb->args[3] = 0; - cb->args[4] = 0; ++num; } - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); s_num = 0; - cb->args[3] = 0; - cb->args[4] = 0; } skip_listen_ht: cb->args[0] = 1; @@ -921,13 +922,14 @@ skip_listen_ht: if (!(idiag_states & ~TCPF_LISTEN)) goto out; +#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; - struct sock *sk; - - num = 0; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; @@ -935,9 +937,12 @@ skip_listen_ht: if (i > s_i) s_num = 0; +next_chunk: + num = 0; + accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int state, res; + int state; if (!net_eq(sock_net(sk), net)) continue; @@ -961,21 +966,35 @@ skip_listen_ht: if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - res = sk_diag_fill(sk, skb, r, + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); - if (res < 0) { - spin_unlock_bh(lock); - goto done; + if (res < 0) + num = num_arr[idx]; } -next_normal: - ++num; + sock_gen_put(sk_arr[idx]); } - - spin_unlock_bh(lock); + if (res < 0) + break; cond_resched(); + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } } done: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 105908d841a3..eaf720b65d7e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1592,7 +1592,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b8a2d63d1fb8..8b13881ed064 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) +{ + int val; + + if (IPCB(skb)->frag_max_size == 0) + return; + + val = IPCB(skb)->frag_max_size; + put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); +} + static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset) { @@ -153,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int tlen, int offset) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset) { - struct inet_sock *inet = inet_sk(skb->sk); + struct inet_sock *inet = inet_sk(sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ @@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, if (flags & IP_CMSG_CHECKSUM) ip_cmsg_recv_checksum(msg, skb, tlen, offset); + + if (flags & IP_CMSG_RECVFRAGSIZE) + ip_cmsg_recv_fragsize(msg, skb); } EXPORT_SYMBOL(ip_cmsg_recv_offset); @@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: + case IP_RECVFRAGSIZE: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, } } break; + case IP_RECVFRAGSIZE: + if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) + goto e_inval; + if (val) + inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; + else + inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_CHECKSUM: val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; break; + case IP_RECVFRAGSIZE: + val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5719d6ba0824..12a92e3349ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -358,6 +358,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, { struct ip_tunnel *nt; struct net_device *dev; + int t_hlen; BUG_ON(!itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); @@ -367,6 +368,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, dev->mtu = ip_tunnel_bind_dev(dev); nt = netdev_priv(dev); + t_hlen = nt->hlen + sizeof(struct iphdr); + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; ip_tunnel_add(itn, nt); return nt; } @@ -929,7 +933,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) int t_hlen = tunnel->hlen + sizeof(struct iphdr); int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; if (new_mtu > max_mtu) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 27089f5ebbb1..665505d86b12 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp4)); + err = fib_rules_lookup(net->ipv4.mr_rules_ops, flowi4_to_flowi(flp4), 0, &arg); if (err < 0) @@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, return -EINVAL; } - mrt = ipmr_get_table(rule->fr_net, rule->table); + arg->table = fib_rule_get_table(rule, arg); + + mrt = ipmr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { + struct net_device *mdev; + + mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); + if (mdev == skb->dev) + goto forward; + if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -2053,7 +2064,7 @@ static int pim_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) || + if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || (pim->flags & PIM_NULL_REGISTER) || (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d613309e3e5d..c11eb1744ab1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV4 + tristate "IPv4 socket lookup support" + help + This option enables the IPv4 socket lookup infrastructure. This is + is required by the iptables socket match. + if NF_TABLES config NF_TABLES_IPV4 @@ -54,6 +60,14 @@ config NFT_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. +config NFT_FIB_IPV4 + select NFT_FIB + tristate "nf_tables fib / ip route lookup support" + help + This module enables IPv4 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 853328f8fd05..f462fee66ac8 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o +obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o + # logging obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o @@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b31df597fd37..39004da318e2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, */ e = get_entry(table_base, private->hook_entry[hook]); - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.hooknum = hook; - acpar.family = NFPROTO_ARP; + acpar.state = state; acpar.hotdrop = false; arp = arp_hdr(skb); @@ -809,7 +805,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -838,7 +834,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(NFPROTO_ARP); @@ -863,7 +859,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; if (get.size == private->size) @@ -875,7 +871,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; return ret; } @@ -902,8 +898,8 @@ static int __do_replace(struct net *net, const char *name, t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free_newinfo_counters_untrans; } @@ -1018,8 +1014,8 @@ static int do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free; } @@ -1408,7 +1404,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1423,7 +1419,7 @@ static int compat_get_entries(struct net *net, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; xt_compat_unlock(NFPROTO_ARP); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7c00ce90adb8..46815c8a60d7 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV4; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); local_bh_disable(); @@ -977,7 +973,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET); @@ -1032,7 +1028,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1043,7 +1039,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; return ret; } @@ -1068,8 +1064,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free_newinfo_counters_untrans; } @@ -1184,8 +1180,8 @@ do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET, tmp.name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free; } @@ -1630,7 +1626,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1644,7 +1640,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; xt_compat_unlock(AF_INET); return ret; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index da7f02a0b868..34cfb9b0bc0a 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -55,7 +55,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) range.min_proto = mr->range[0].min; range.max_proto = mr->range[0].max; - return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out); + return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range, + xt_out(par)); } static struct xt_target masquerade_tg_reg __read_mostly = { diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1d16c0f28df0..8bd0d7b26632 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,7 +34,7 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; - int hook = par->hooknum; + int hook = xt_hooknum(par); switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: @@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(par->net, skb, hook); + nf_send_reset(xt_net(par), skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index db5b87509446..361411688221 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -263,12 +263,12 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 78cc64eddfc1..59b49945b481 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -95,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; - return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 713c09a74b90..7130ed5dc1fa 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -336,47 +336,34 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); +static struct nf_conntrack_l4proto *builtin_l4proto4[] = { + &nf_conntrack_l4proto_tcp4, + &nf_conntrack_l4proto_udp4, + &nf_conntrack_l4proto_icmp, +}; + static int ipv4_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); - if (ret < 0) { - pr_err("nf_conntrack_tcp4: pernet registration failed\n"); - goto out_tcp; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); - if (ret < 0) { - pr_err("nf_conntrack_udp4: pernet registration failed\n"); - goto out_udp; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); - if (ret < 0) { - pr_err("nf_conntrack_icmp4: pernet registration failed\n"); - goto out_icmp; - } + ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); + if (ret < 0) + return ret; ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: pernet registration failed\n"); - goto out_ipv4; + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } - return 0; -out_ipv4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); -out_icmp: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); -out_udp: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); -out_tcp: return ret; } static void ipv4_net_exit(struct net *net) { nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } static struct pernet_operations ipv4_net_ops = { @@ -410,37 +397,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_pernet; } - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + ret = nf_ct_l4proto_register(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); + if (ret < 0) goto cleanup_hooks; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); - goto cleanup_tcp4; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); - goto cleanup_udp4; - } ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); - goto cleanup_icmpv4; + goto cleanup_l4proto; } return ret; - cleanup_icmpv4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); - cleanup_udp4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); - cleanup_tcp4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); +cleanup_l4proto: + nf_ct_l4proto_unregister(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); cleanup_pernet: @@ -454,9 +425,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_unregister(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c new file mode 100644 index 000000000000..a83d558e1aae --- /dev/null +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, + __be32 *raddr, __be32 *laddr, + __be16 *rport, __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + u8 uninitialized_var(protocol); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif + int doff = 0; + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index 0c01a270bf9f..0af3d8df70dd 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, }; int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; - nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); + nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif); } static int nft_dup_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c new file mode 100644 index 000000000000..1b49966484b3 --- /dev/null +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -0,0 +1,238 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip_fib.h> +#include <net/route.h> + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool fib4_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +#define DSCP_BITS 0xfc + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dst = ®s->data[priv->dreg]; + const struct net_device *dev = NULL; + const struct iphdr *iph; + __be32 addr; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + dev = nft_out(pkt); + + iph = ip_hdr(pkt->skb); + if (priv->flags & NFTA_FIB_F_DADDR) + addr = iph->daddr; + else + addr = iph->saddr; + + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval_type); + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + const struct iphdr *iph; + struct fib_result res; + struct flowi4 fl4 = { + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_iif = LOOPBACK_IFINDEX, + }; + const struct net_device *oif; + struct net_device *found; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int i; +#endif + + /* + * Do not set flowi4_oif, it restricts results (for example, asking + * for oif 3 will get RTN_UNICAST result even if the daddr exits + * on another interface. + * + * Search results for the desired outinterface instead. + */ + if (priv->flags & NFTA_FIB_F_OIF) + oif = nft_out(pkt); + else if (priv->flags & NFTA_FIB_F_IIF) + oif = nft_in(pkt); + else + oif = NULL; + + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + iph = ip_hdr(pkt->skb); + if (ipv4_is_multicast(iph->daddr) && + ipv4_is_zeronet(iph->saddr) && + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv->result, pkt, + get_ifindex(pkt->skb->dev)); + return; + } + + if (priv->flags & NFTA_FIB_F_MARK) + fl4.flowi4_mark = pkt->skb->mark; + + fl4.flowi4_tos = iph->tos & DSCP_BITS; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl4.daddr = iph->daddr; + fl4.saddr = get_saddr(iph->saddr); + } else { + fl4.daddr = iph->saddr; + fl4.saddr = get_saddr(iph->daddr); + } + + if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return; + + switch (res.type) { + case RTN_UNICAST: + break; + case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + return; + default: + break; + } + + if (!oif) { + found = FIB_RES_DEV(res); + goto ok; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (i = 0; i < res.fi->fib_nhs; i++) { + struct fib_nh *nh = &res.fi->fib_nh[i]; + + if (nh->nh_dev == oif) { + found = nh->nh_dev; + goto ok; + } + } + return; +#else + found = FIB_RES_DEV(res); + if (found != oif) + return; +#endif +ok: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = found->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, found->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib4_eval); + +static struct nft_expr_type nft_fib4_type; + +static const struct nft_expr_ops nft_fib4_type_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib4_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib4_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib4_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib4_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib4_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib4_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib4_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, +}; + +static int __init nft_fib4_module_init(void) +{ + return nft_register_expr(&nft_fib4_type); +} + +static void __exit nft_fib4_module_exit(void) +{ + nft_unregister_expr(&nft_fib4_type); +} + +module_init(nft_fib4_module_init); +module_exit(nft_fib4_module_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 51ced81b616c..4f697e431811 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -31,8 +31,8 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, range.max_proto.all = *(__be16 *)®s->data[priv->sreg_proto_max]; } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, - &range, pkt->out); + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), + &range, nft_out(pkt)); } static struct nft_expr_type nft_masq_ipv4_type; diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index c09d4381427e..16df0493c5ce 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -35,8 +35,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, mr.range[0].flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, - pkt->hook); + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 2c2553b9026c..517ce93699de 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,10 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook); + nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->net, pkt->skb, pkt->hook); + nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; default: break; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..d11129f1178d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -789,7 +789,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ecbe5a7c2d6d..2300fae11b22 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -89,9 +89,10 @@ struct raw_frag_vec { int hlen; }; -static struct raw_hashinfo raw_v4_hashinfo = { +struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { @@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(raw_unhash_sk); -static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { sk_for_each_from(sk) { @@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v4_lookup); /* * 0 - deliver @@ -604,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; @@ -693,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int ret = -EINVAL; int chk_addr_ret; if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); + + if (sk->sk_bound_dev_if) + tb_id = l3mdev_fib_table_by_index(sock_net(sk), + sk->sk_bound_dev_if) ? : tb_id; + + chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr, + tb_id); + ret = -EADDRNOTAVAIL; if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -912,6 +922,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg } #endif +int raw_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + __udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(raw_abort); + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -937,6 +961,7 @@ struct proto raw_prot = { .compat_getsockopt = compat_raw_getsockopt, .compat_ioctl = compat_raw_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c new file mode 100644 index 000000000000..e1a51ca68d23 --- /dev/null +++ b/net/ipv4/raw_diag.c @@ -0,0 +1,266 @@ +#include <linux/module.h> + +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> + +#include <net/inet_sock.h> +#include <net/raw.h> +#include <net/rawv6.h> + +#ifdef pr_fmt +# undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +static struct raw_hashinfo * +raw_get_hashinfo(const struct inet_diag_req_v2 *r) +{ + if (r->sdiag_family == AF_INET) { + return &raw_v4_hashinfo; +#if IS_ENABLED(CONFIG_IPV6) + } else if (r->sdiag_family == AF_INET6) { + return &raw_v6_hashinfo; +#endif + } else { + pr_warn_once("Unexpected inet family %d\n", + r->sdiag_family); + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); + } +} + +/* + * Due to requirement of not breaking user API we can't simply + * rename @pad field in inet_diag_req_v2 structure, instead + * use helper to figure it out. + */ + +static struct sock *raw_lookup(struct net *net, struct sock *from, + const struct inet_diag_req_v2 *req) +{ + struct inet_diag_req_raw *r = (void *)req; + struct sock *sk = NULL; + + if (r->sdiag_family == AF_INET) + sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, + r->id.idiag_dst[0], + r->id.idiag_src[0], + r->id.idiag_if); +#if IS_ENABLED(CONFIG_IPV6) + else + sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, + (const struct in6_addr *)r->id.idiag_src, + (const struct in6_addr *)r->id.idiag_dst, + r->id.idiag_if); +#endif + return sk; +} + +static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) +{ + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct sock *sk = NULL, *s; + int slot; + + if (IS_ERR(hashinfo)) + return ERR_CAST(hashinfo); + + read_lock(&hashinfo->lock); + for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { + sk_for_each(s, &hashinfo->ht[slot]) { + sk = raw_lookup(net, s, r); + if (sk) { + /* + * Grab it and keep until we fill + * diag meaage to be reported, so + * caller should call sock_put then. + * We can do that because we're keeping + * hashinfo->lock here. + */ + sock_hold(sk); + goto out_unlock; + } + } + } +out_unlock: + read_unlock(&hashinfo->lock); + + return sk ? sk : ERR_PTR(-ENOENT); +} + +static int raw_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + rep = nlmsg_new(sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + 64, + GFP_KERNEL); + if (!rep) { + sock_put(sk); + return -ENOMEM; + } + + err = inet_sk_diag_fill(sk, NULL, rep, r, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); + sock_put(sk); + + if (err < 0) { + kfree_skb(rep); + return err; + } + + err = netlink_unicast(net->diag_nlsk, rep, + NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; + return err; +} + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc, bool net_admin) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh, net_admin); +} + +static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; + struct sock *sk = NULL; + + if (IS_ERR(hashinfo)) + return; + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + read_lock(&hashinfo->lock); + for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { + num = 0; + + sk_for_each(sk, &hashinfo->ht[slot]) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + if (sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) + goto out_unlock; +next: + num++; + } + } + +out_unlock: + read_unlock(&hashinfo->lock); + + cb->args[0] = slot; + cb->args[1] = num; +} + +static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); +} + +#ifdef CONFIG_INET_DIAG_DESTROY +static int raw_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + int err; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + err = sock_diag_destroy(sk, ECONNABORTED); + sock_put(sk); + return err; +} +#endif + +static const struct inet_diag_handler raw_diag_handler = { + .dump = raw_diag_dump, + .dump_one = raw_diag_dump_one, + .idiag_get_info = raw_diag_get_info, + .idiag_type = IPPROTO_RAW, + .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = raw_diag_destroy, +#endif +}; + +static void __always_unused __check_inet_diag_req_raw(void) +{ + /* + * Make sure the two structures are identical, + * except the @pad field. + */ +#define __offset_mismatch(m1, m2) \ + (offsetof(struct inet_diag_req_v2, m1) != \ + offsetof(struct inet_diag_req_raw, m2)) + + BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) != + sizeof(struct inet_diag_req_raw)); + BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family)); + BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext)); + BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states)); + BUILD_BUG_ON(__offset_mismatch(id, id)); +#undef __offset_mismatch +} + +static int __init raw_diag_init(void) +{ + return inet_diag_register(&raw_diag_handler); +} + +static void __exit raw_diag_exit(void) +{ + inet_diag_unregister(&raw_diag_handler); +} + +module_init(raw_diag_init); +module_exit(raw_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2a57566e6e91..d37fc6f7e679 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,7 +524,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -535,7 +537,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +554,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -802,7 +804,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1020,7 +1022,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1036,7 +1038,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1055,6 +1057,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1068,7 +1071,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1108,7 +1111,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1123,9 +1126,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); @@ -1982,25 +1986,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, */ if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); + int our = 0; + + if (in_dev) + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + + /* check l3 master if no match yet */ + if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + struct in_device *l3_in_dev; + + l3_in_dev = __in_dev_get_rcu(skb->dev); + if (l3_in_dev) + our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + } - if (in_dev) { - int our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); - if (our + res = -EINVAL; + if (our #ifdef CONFIG_IP_MROUTE - || - (!ipv4_is_local_multicast(daddr) && - IN_DEV_MFORWARD(in_dev)) + || + (!ipv4_is_local_multicast(daddr) && + IN_DEV_MFORWARD(in_dev)) #endif - ) { - int res = ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); - rcu_read_unlock(); - return res; - } + ) { + res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); } rcu_read_unlock(); - return -EINVAL; + return res; } res = ip_route_input_slow(skb, daddr, saddr, tos, dev); rcu_read_unlock(); @@ -2268,7 +2282,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + (ipv4_is_multicast(fl4->daddr) || + !netif_index_is_l3_master(net, fl4->flowi4_oif))) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2495,6 +2510,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2547,6 +2567,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2574,6 +2595,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2581,6 +2606,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..0dc6286272aa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -372,7 +372,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 814af89c1bd3..b025a69ebd28 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,6 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> -#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2710,9 +2708,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2721,6 +2718,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_state = sk_state_load(sk); + /* Report meaningful fields for all TCP states, including listeners */ + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_pacing_rate = rate64; + + rate = READ_ONCE(sk->sk_max_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_max_pacing_rate = rate64; + + info->tcpi_reordering = tp->reordering; + info->tcpi_snd_cwnd = tp->snd_cwnd; + + if (info->tcpi_state == TCP_LISTEN) { + /* listeners aliased fields : + * tcpi_unacked -> Number of children ready for accept() + * tcpi_sacked -> max backlog + */ + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + return; + } info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2766,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (info->tcpi_state == TCP_LISTEN) { - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; - } else { - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - } + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2782,24 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; - info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; - info->tcpi_reordering = tp->reordering; info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; - rate = READ_ONCE(sk->sk_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_pacing_rate); + slow = lock_sock_fast(sk); - rate = READ_ONCE(sk->sk_max_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_max_pacing_rate); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + + unlock_sock_fast(sk, slow); - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; @@ -2806,7 +2810,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (rate && intv) { rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; do_div(rate64, intv); - put_unaligned(rate64, &info->tcpi_delivery_rate); + info->tcpi_delivery_rate = rate64; } } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 0ea66c2c9344..b89bce4c721e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -14,6 +14,36 @@ * observed, or adjust the sending rate if it estimates there is a * traffic policer, in order to keep the drop rate reasonable. * + * Here is a state transition diagram for BBR: + * + * | + * V + * +---> STARTUP ----+ + * | | | + * | V | + * | DRAIN ----+ + * | | | + * | V | + * +---> PROBE_BW ----+ + * | ^ | | + * | | | | + * | +----+ | + * | | + * +---- PROBE_RTT <--+ + * + * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. + * When it estimates the pipe is full, it enters DRAIN to drain the queue. + * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. + * A long-lived BBR flow spends the vast majority of its time remaining + * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth + * in a fair manner, with a small, bounded queue. *If* a flow has been + * continuously sending for the entire min_rtt window, and hasn't seen an RTT + * sample that matches or decreases its min_rtt estimate for 10 seconds, then + * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe + * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if + * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; + * otherwise we enter STARTUP to try to fill the pipe. + * * BBR is described in detail in: * "BBR: Congestion-Based Congestion Control", * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, @@ -51,7 +81,7 @@ enum bbr_mode { BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ BBR_DRAIN, /* drain any queue created during startup */ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ - BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ + BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ }; /* BBR congestion control block */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a27b9c0e27c0..a70046fea0e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) u32 delta = ack - tp->snd_una; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) u32 delta = seq - tp->rcv_nxt; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; } @@ -6298,13 +6294,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + if (sk_acceptq_is_full(sk)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242..5555eb86e549 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +712,7 @@ out: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, @@ -1908,7 +1911,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); + spin_lock(&ilb->lock); sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; @@ -1925,7 +1928,7 @@ get_sk: if (sk->sk_family == st->family) return sk; } - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) goto get_head; @@ -2133,7 +2136,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bf1f3b2b29d1..d46f4d5b1c62 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -742,14 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static struct genl_family tcp_metrics_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = TCP_METRICS_GENL_NAME, - .version = TCP_METRICS_GENL_VERSION, - .maxattr = TCP_METRICS_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family tcp_metrics_nl_family; static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, @@ -1116,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { }, }; +static struct genl_family tcp_metrics_nl_family __ro_after_init = { + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tcp_metrics_nl_ops, + .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops), +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -1179,8 +1183,7 @@ void __init tcp_metrics_init(void) if (ret < 0) panic("Could not allocate the tcp_metrics hash table\n"); - ret = genl_register_family_with_ops(&tcp_metrics_nl_family, - tcp_metrics_nl_ops); + ret = genl_register_family(&tcp_metrics_nl_family); if (ret < 0) panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 896e9dfbdb5c..f57b5aa51b59 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2529,8 +2529,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_unlink_write_queue(next_skb, sk); - skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), - next_skb_size); + if (next_skb_size) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; @@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return false; - /* TODO: SACK collapsing could be used to remove this condition */ - if (skb_shinfo(skb)->nr_frags != 0) - return false; if (skb_cloned(skb)) return false; if (skb == tcp_send_head(sk)) return false; - /* Some heurestics for collapsing over SACK'd could be invented */ + /* Some heuristics for collapsing over SACK'd could be invented */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) return false; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d123d68f4d1d..c827e4ea509e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -580,7 +580,8 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ - IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \ + IS_ENABLED(CONFIG_NF_SOCKET_IPV4) struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { @@ -1019,7 +1020,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); @@ -1172,6 +1174,119 @@ out: return ret; } +/* fully reclaim rmem/fwd memory allocated for skb */ +static void udp_rmem_release(struct sock *sk, int size, int partial) +{ + int amt; + + atomic_sub(size, &sk->sk_rmem_alloc); + sk->sk_forward_alloc += size; + amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); + sk->sk_forward_alloc -= amt; + + if (amt) + __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); +} + +/* Note: called with sk_receive_queue.lock held */ +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) +{ + udp_rmem_release(sk, skb->truesize, 1); +} +EXPORT_SYMBOL(udp_skb_destructor); + +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff_head *list = &sk->sk_receive_queue; + int rmem, delta, amt, err = -ENOMEM; + int size = skb->truesize; + + /* try to avoid the costly atomic add/sub pair when the receive + * queue is full; always allow at least a packet + */ + rmem = atomic_read(&sk->sk_rmem_alloc); + if (rmem && (rmem + size > sk->sk_rcvbuf)) + goto drop; + + /* we drop only if the receive buf is full and the receive + * queue contains some other skb + */ + rmem = atomic_add_return(size, &sk->sk_rmem_alloc); + if ((rmem > sk->sk_rcvbuf) && (rmem > size)) + goto uncharge_drop; + + spin_lock(&list->lock); + if (size >= sk->sk_forward_alloc) { + amt = sk_mem_pages(size); + delta = amt << SK_MEM_QUANTUM_SHIFT; + if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { + err = -ENOBUFS; + spin_unlock(&list->lock); + goto uncharge_drop; + } + + sk->sk_forward_alloc += delta; + } + + sk->sk_forward_alloc -= size; + + /* no need to setup a destructor, we will explicitly release the + * forward allocated memory on dequeue + */ + skb->dev = NULL; + sock_skb_set_dropcount(sk, skb); + + __skb_queue_tail(list, skb); + spin_unlock(&list->lock); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk); + + return 0; + +uncharge_drop: + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + +drop: + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); + +static void udp_destruct_sock(struct sock *sk) +{ + /* reclaim completely the forward allocated memory */ + unsigned int total = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + total += skb->truesize; + kfree_skb(skb); + } + udp_rmem_release(sk, total, 0); + + inet_sock_destruct(sk); +} + +int udp_init_sock(struct sock *sk) +{ + sk->sk_destruct = udp_destruct_sock; + return 0; +} +EXPORT_SYMBOL_GPL(udp_init_sock); + +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) +{ + if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { + bool slow = lock_sock_fast(sk); + + sk_peek_offset_bwd(sk, len); + unlock_sock_fast(sk, slow); + } + consume_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_consume_udp); + /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1181,12 +1296,11 @@ out: */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; + int total = 0; int res; - __skb_queue_head_init(&list_kill); - spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { @@ -1196,18 +1310,13 @@ static int first_packet_length(struct sock *sk) IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); - __skb_queue_tail(&list_kill, skb); + total += skb->truesize; + kfree_skb(skb); } res = skb ? skb->len : -1; + if (total) + udp_rmem_release(sk, total, 1); spin_unlock_bh(&rcvq->lock); - - if (!skb_queue_empty(&list_kill)) { - bool slow = lock_sock_fast(sk); - - __skb_queue_purge(&list_kill); - sk_mem_reclaim_partial(sk); - unlock_sock_fast(sk, slow); - } return res; } @@ -1256,15 +1365,13 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; - bool slow; if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len, addr_len); try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -1297,13 +1404,12 @@ try_again: } if (unlikely(err)) { - trace_kfree_skb(skb, udp_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - skb_free_datagram_locked(sk, skb); + kfree_skb(skb); return err; } @@ -1322,22 +1428,21 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) err = ulen; - __skb_free_datagram_locked(sk, skb, peeking ? -err : err); + skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: - slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - unlock_sock_fast(sk, slow); + kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -1465,7 +1570,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = __sock_queue_rcv_skb(sk, skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1480,7 +1585,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; - } static struct static_key udp_encap_needed __read_mostly; @@ -1502,7 +1606,6 @@ EXPORT_SYMBOL(udp_encap_enable); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; int is_udplite = IS_UDPLITE(sk); /* @@ -1589,25 +1692,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; udp_csum_pull_header(skb); - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); - goto drop; - } - - rc = 0; ipv4_pktinfo_prepare(sk, skb); - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - - return rc; + return __udp_queue_rcv_skb(sk, skb); csum_error: __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -2217,13 +2304,13 @@ struct proto udp_prot = { .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, + .init = udp_init_sock, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = __udp_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..0f00811a785f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,4 +289,28 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6 + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..129cad2ba960 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o seg6_iptunnel.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o +obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..86219c0a0104 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -238,6 +238,10 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -284,6 +288,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; /* Check if a valid qdisc is available */ @@ -4944,6 +4952,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif } static inline size_t inet6_ifla6_size(void) @@ -6036,6 +6048,22 @@ static const struct ctl_table addrconf_sysctl[] = { }, { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..d424f3a3737a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> +#include <net/seg6.h> #include <asm/uaccess.h> #include <linux/mroute6.h> @@ -678,6 +679,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); @@ -990,6 +992,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1000,8 +1006,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..c5d76d2edd26 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; @@ -715,6 +716,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..218f0cba231c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..926818c331e5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,11 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif +#include <linux/seg6.h> +#include <net/seg6.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif #include <linux/uaccess.h> @@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + bool cleanup = false; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) { + kfree_skb(skb); + return -1; + } +#endif + +looped_back: + if (hdr->segments_left > 0) { + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && + sr_has_cleanup(hdr)) + cleanup = true; + } else { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + kfree_skb(skb); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + if (cleanup) { + int srhlen = (hdr->hdrlen + 1) << 3; + int nh = hdr->nexthdr; + + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); + memmove(skb_network_header(skb) + srhlen, + skb_network_header(skb), + (unsigned char *)hdr - skb_network_header(skb)); + skb->network_header += srhlen; + ipv6_hdr(skb)->nexthdr = nh; + ipv6_hdr(skb)->payload_len = htons(skb->len - + sizeof(struct ipv6hdr)); + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); + } + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + /* be sure that srh is still present before reinjecting */ + if (!cleanup) { + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { @@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * for headers. */ -static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p) +static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, *proto = NEXTHDR_ROUTING; } +static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; + int plen, hops; + + sr_ihdr = (struct ipv6_sr_hdr *)opt; + plen = (sr_ihdr->hdrlen + 1) << 3; + + sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen); + memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); + + hops = sr_ihdr->first_segment + 1; + memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1, + (hops - 1) * sizeof(struct in6_addr)); + + sr_phdr->segments[0] = **addr_p; + *addr_p = &sr_ihdr->segments[hops - 1]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(sr_phdr)) { + struct net *net = NULL; + + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + + WARN_ON(!net); + + if (net) + seg6_push_hmac(net, saddr, sr_phdr); + } +#endif + + sr_phdr->nexthdr = *proto; + *proto = NEXTHDR_ROUTING; +} + +static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + switch (opt->type) { + case IPV6_SRCRT_TYPE_0: + ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + break; + case IPV6_SRCRT_TYPE_4: + ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); + break; + default: + break; + } +} + static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); @@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, - struct in6_addr **daddr) + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). @@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, return NULL; *orig = fl6->daddr; - fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + + switch (opt->srcrt->type) { + case IPV6_SRCRT_TYPE_0: + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + break; + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; + + fl6->daddr = srh->segments[srh->first_segment]; + break; + } + default: + return NULL; + } + return orig; } EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7370ad2e693a..eb948ffd734b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -484,6 +485,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -658,6 +660,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index e50c27a93e17..a7bc54ab46e2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -6,29 +6,88 @@ #include <linux/socket.h> #include <linux/types.h> #include <net/checksum.h> +#include <net/dst_cache.h> #include <net/ip.h> #include <net/ip6_fib.h> +#include <net/ip6_route.h> #include <net/lwtunnel.h> #include <net/protocol.h> #include <uapi/linux/ila.h> #include "ila.h" +struct ila_lwt { + struct ila_params p; + struct dst_cache dst_cache; + u32 connected : 1; +}; + +static inline struct ila_lwt *ila_lwt_lwtunnel( + struct lwtunnel_state *lwt) +{ + return (struct ila_lwt *)lwt->data; +} + static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) + struct lwtunnel_state *lwt) { - return (struct ila_params *)lwstate->data; + return &ila_lwt_lwtunnel(lwt)->p; } static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *orig_dst = skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); + struct dst_entry *dst; + int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), + true); - return dst->lwtstate->orig_output(net, sk, skb); + if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { + /* Already have a next hop address in route, no need for + * dest cache route. + */ + return orig_dst->lwtstate->orig_output(net, sk, skb); + } + + dst = dst_cache_get(&ilwt->dst_cache); + if (unlikely(!dst)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct flowi6 fl6; + + /* Lookup a route for the new destination. Take into + * account that the base route may already have a gateway. + */ + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + &ip6h->daddr); + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = -EHOSTUNREACH; + dst_release(dst); + goto drop; + } + + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto drop; + } + + if (ilwt->connected) + dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); + } + + skb_dst_set(skb, dst); + return dst_output(net, sk, skb); drop: kfree_skb(skb); @@ -60,9 +119,9 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { + struct ila_lwt *ilwt; struct ila_params *p; struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*p); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; @@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { /* Need to have full locator and at least type field * included in destination */ @@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - newts = lwtunnel_state_alloc(encap_len); + newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) return -ENOMEM; - newts->len = encap_len; + ilwt = ila_lwt_lwtunnel(newts); + ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC); + if (ret) { + kfree(newts); + return ret; + } + p = ila_params_lwtunnel(newts); p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); @@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | LWTUNNEL_STATE_INPUT_REDIRECT; + if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr)) + ilwt->connected = 1; + *ts = newts; return 0; } +static void ila_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache); +} + static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -159,6 +232,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops ila_encap_ops = { .build_state = ila_build_state, + .destroy_state = ila_destroy_state, .output = ila_output, .input = ila_input, .fill_encap = ila_fill_encap_info, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e604013dd814..af8f52ee7180 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; +static struct genl_family ila_nl_family; static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, @@ -482,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, GFP_KERNEL); @@ -490,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb) static int ila_nl_dump_done(struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; rhashtable_walk_exit(&iter->rhiter); + kfree(iter); + return 0; } static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; struct ila_map *ila; int ret; @@ -561,6 +563,18 @@ static const struct genl_ops ila_nl_ops[] = { }, }; +static struct genl_family ila_nl_family __ro_after_init = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + #define ILA_HASH_TABLE_SIZE 1024 static __net_init int ila_init_net(struct net *net) @@ -623,7 +637,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) return 0; } -int ila_xlat_init(void) +int __init ila_xlat_init(void) { int ret; @@ -631,8 +645,7 @@ int ila_xlat_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&ila_nl_family, - ila_nl_ops); + ret = genl_register_family(&ila_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..1c86c478f578 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..710bc79f9113 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59eb4ed99ce8..b37054b1873d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -203,7 +203,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); } skb_push(skb, sizeof(struct ipv6hdr)); @@ -1672,7 +1673,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); + ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 87784560dc46..259e8507d2cd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1157,7 +1157,7 @@ route_lookup: if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); } /* Calculate max headroom for all the headers and adjust @@ -1240,6 +1240,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1318,6 +1320,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1637,7 +1641,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) struct ip6_tnl *tnl = netdev_priv(dev); if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < 68) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } else { if (new_mtu < IPV6_MIN_MTU) @@ -1790,6 +1794,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len; return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8a02ca8a11af..af3f0e011265 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; @@ -812,30 +813,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -/** - * vti6_tnl_change_mtu - change mtu manually for tunnel device - * @dev: virtual device associated with tunnel - * @new_mtu: the new mtu - * - * Return: - * 0 on success, - * %-EINVAL if mtu too small - **/ -static int vti6_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < IPV6_MIN_MTU) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, - .ndo_change_mtu = vti6_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -855,6 +837,8 @@ static void vti6_dev_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = IP_MAX_MTU; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7f4265b1649b..52101b37ad6e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || + if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, sizeof(*pim), IPPROTO_PIM, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 636ec56f5f50..3ba530373560 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -52,6 +52,7 @@ #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/seg6.h> #include <asm/uaccess.h> @@ -430,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; #endif + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) + opt->srcrt; + + if (!seg6_validate_srh(srh, optlen)) + goto sticky_done; + break; + } default: goto sticky_done; } @@ -868,6 +878,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1310,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e10a04c9cdc7..6acb2eecd986 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV6 + tristate "IPv6 socket lookup support" + help + This option enables the IPv6 socket lookup infrastructure. This + is used by the ip6tables socket match. + if NF_TABLES config NF_TABLES_IPV6 @@ -54,6 +60,14 @@ config NFT_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. +config NFT_FIB_IPV6 + tristate "nf_tables fib / ipv6 route lookup support" + select NFT_FIB + help + This module enables IPv6 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV6 endif # NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b4f7d0b4e2af..fe180c96040e 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o + # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o @@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 55aacea24396..6ff42b8301cc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV6; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1037,7 +1033,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET6); @@ -1063,7 +1059,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1074,7 +1070,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; return ret; } @@ -1099,8 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free_newinfo_counters_untrans; } @@ -1214,8 +1210,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free; } @@ -1651,7 +1647,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1665,7 +1661,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; xt_compat_unlock(AF_INET6); return ret; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 7f9f45d829d2..2b1a15846f9a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -24,7 +24,7 @@ static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index db29bbf41b59..fa51a205918d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,35 +39,40 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par)); break; case IP6T_ICMP6_ADM_PROHIBITED: - nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, + xt_hooknum(par)); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, + xt_hooknum(par)); break; case IP6T_ICMP6_ADDR_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_PORT_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - nf_send_reset6(net, skb, par->hooknum); + nf_send_reset6(net, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: - nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); break; case IP6T_ICMP6_REJECT_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, + xt_hooknum(par)); break; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 06bed74cf5ee..99a1216287c8 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -277,12 +277,12 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 1ee1b25df096..d5263dc364a9 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), + info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 963ee3848675..500be28ff563 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -336,47 +336,35 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; +static struct nf_conntrack_l4proto *builtin_l4proto6[] = { + &nf_conntrack_l4proto_tcp6, + &nf_conntrack_l4proto_udp6, + &nf_conntrack_l4proto_icmpv6, +}; + static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_tcp6: pernet registration failed\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_udp6: pernet registration failed\n"); - goto cleanup_tcp6; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_icmp6: pernet registration failed\n"); - goto cleanup_udp6; - } + ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) + return ret; + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); - goto cleanup_icmpv6; + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } - return 0; - cleanup_icmpv6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); - out: return ret; } static void ipv6_net_exit(struct net *net) { nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } static struct pernet_operations ipv6_net_ops = { @@ -409,37 +397,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) goto cleanup_pernet; } - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + ret = nf_ct_l4proto_register(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) goto cleanup_hooks; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); - goto cleanup_tcp6; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); - goto cleanup_udp6; - } ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); - goto cleanup_icmpv6; + goto cleanup_l4proto; } return ret; - - cleanup_icmpv6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); +cleanup_l4proto: + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); cleanup_hooks: nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: @@ -453,9 +424,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c new file mode 100644 index 000000000000..ebb2bf84232a --- /dev/null +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/inet6_hashtables.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + const struct in6_addr **raddr, + const struct in6_addr **laddr, + __be16 *rport, + __be16 *lport, + struct ipv6hdr *ipv6_var) +{ + const struct ipv6hdr *inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + __be16 inside_fragoff; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), + &inside_nexthdr, &inside_fragoff); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; + int thoff = 0, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NULL; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); + + } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport, &ipv6_var)) + return NULL; + } else { + return NULL; + } + + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, + sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure"); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 831f86e1ec08..d8b5b60b7d53 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; - nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); + nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c new file mode 100644 index 000000000000..d526bb594956 --- /dev/null +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -0,0 +1,275 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +static bool fib6_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *)skb_dst(skb); + + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, + const struct nft_pktinfo *pkt, + const struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); + int lookup_flags = 0; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl6->daddr = iph->daddr; + fl6->saddr = iph->saddr; + } else { + fl6->daddr = iph->saddr; + fl6->saddr = iph->daddr; + } + + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } + + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; + + if (priv->flags & NFTA_FIB_F_MARK) + fl6->flowi6_mark = pkt->skb->mark; + + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + + return lookup_flags; +} + +static u32 __nft_fib6_eval_type(const struct nft_fib *priv, + const struct nft_pktinfo *pkt) +{ + const struct net_device *dev = NULL; + const struct nf_ipv6_ops *v6ops; + const struct nf_afinfo *afinfo; + int route_err, addrtype; + struct rt6_info *rt; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + u32 ret = 0; + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (!afinfo) + return RTN_UNREACHABLE; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + dev = nft_out(pkt); + + nft_fib6_flowi_init(&fl6, priv, pkt, dev); + + v6ops = nf_get_ipv6_ops(); + if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + ret = RTN_LOCAL; + + route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); + if (route_err) + goto err; + + if (rt->rt6i_flags & RTF_REJECT) { + route_err = rt->dst.error; + dst_release(&rt->dst); + goto err; + } + + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + ret = RTN_ANYCAST; + else if (!dev && rt->rt6i_flags & RTF_LOCAL) + ret = RTN_LOCAL; + + dst_release(&rt->dst); + + if (ret) + return ret; + + addrtype = ipv6_addr_type(&fl6.daddr); + + if (addrtype & IPV6_ADDR_MULTICAST) + return RTN_MULTICAST; + if (addrtype & IPV6_ADDR_UNICAST) + return RTN_UNICAST; + + return RTN_UNSPEC; + err: + switch (route_err) { + case -EINVAL: + return RTN_BLACKHOLE; + case -EACCES: + return RTN_PROHIBIT; + case -EAGAIN: + return RTN_THROW; + default: + break; + } + + return RTN_UNREACHABLE; +} + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + *dest = __nft_fib6_eval_type(priv, pkt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval_type); + +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + const struct net_device *oif = NULL; + u32 *dest = ®s->data[priv->dreg]; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + struct rt6_info *rt; + int lookup_flags; + + if (priv->flags & NFTA_FIB_F_IIF) + oif = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + oif = nft_out(pkt); + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + *dest = 0; + again: + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); + if (rt->dst.error) + goto put_rt_err; + + /* Should not see RTF_LOCAL here */ + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + goto put_rt_err; + + if (oif && oif != rt->rt6i_idev->dev) { + /* multipath route? Try again with F_IFACE */ + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = oif->ifindex; + ip6_rt_put(rt); + goto again; + } + } + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = rt->rt6i_idev->dev->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } + + put_rt_err: + ip6_rt_put(rt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval); + +static struct nft_expr_type nft_fib6_type; + +static const struct nft_expr_ops nft_fib6_type_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib6_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib6_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib6_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib6_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib6_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib6_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib6_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, +}; + +static int __init nft_fib6_module_init(void) +{ + return nft_register_expr(&nft_fib6_type); +} + +static void __exit nft_fib6_module_exit(void) +{ + nft_unregister_expr(&nft_fib6_type); +} +module_init(nft_fib6_module_init); +module_exit(nft_fib6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 9597ffb74077..a2aff1277b40 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -32,7 +32,8 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, range.max_proto.all = *(__be16 *)®s->data[priv->sreg_proto_max]; } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); } static struct nft_expr_type nft_masq_ipv6_type; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index aca44e89a881..bfcd5af6bc15 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -35,7 +35,8 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, range.flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); + regs->verdict.code = + nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 92bda9908bb9..057deeaff1cb 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code, + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; default: break; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 66e2d9dfc43a..e1f8b34d7a2e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 054a1d84fc5e..291ebc260e70 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -65,11 +65,12 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -static struct raw_hashinfo raw_v6_hashinfo = { +struct raw_hashinfo raw_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { @@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v6_lookup); /* * 0 - deliver @@ -774,6 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; @@ -1259,6 +1262,7 @@ struct proto rawv6_prot = { .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3815e8505ed2..e1da5b888cc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, { struct sk_buff *prev, *next; struct net_device *dev; - int offset, end; + int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; @@ -336,6 +336,10 @@ found: fq->ecn |= ecn; add_frag_mem_limit(fq->q.net, skb->truesize); + fragsize = -skb_network_offset(skb) + skb->len; + if (fragsize > fq->q.max_size) + fq->q.max_size = fragsize; + /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ @@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + IP6CB(head)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ skb_postpush_rcsum(head, skb_network_header(head), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b57e11e6e0d..b317bb135ed4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1408,7 +1408,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1420,6 +1420,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1433,7 +1434,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1525,7 +1526,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1538,6 +1540,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1559,6 +1562,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1567,7 +1571,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); @@ -2801,6 +2806,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3375,6 +3381,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..50f6e0663d1d --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,487 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/slab.h> + +#include <net/ipv6.h> +#include <net/protocol.h> + +#include <net/seg6.h> +#include <net/genetlink.h> +#include <linux/seg6.h> +#include <linux/seg6_genl.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + u32 hmackeyid; + char *secret; + int err = 0; + u8 algid; + u8 slen; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_HMACKEYID] || + !info->attrs[SEG6_ATTR_SECRETLEN] || + !info->attrs[SEG6_ATTR_ALGID]) + return -EINVAL; + + hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]); + slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]); + algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]); + + if (hmackeyid == 0) + return -EINVAL; + + if (slen > SEG6_HMAC_SECRET_LEN) + return -EINVAL; + + mutex_lock(&sdata->lock); + hinfo = seg6_hmac_info_lookup(net, hmackeyid); + + if (!slen) { + if (!hinfo) + err = -ENOENT; + + err = seg6_hmac_info_del(net, hmackeyid); + + goto out_unlock; + } + + if (!info->attrs[SEG6_ATTR_SECRET]) { + err = -EINVAL; + goto out_unlock; + } + + if (hinfo) { + err = seg6_hmac_info_del(net, hmackeyid); + if (err) + goto out_unlock; + } + + secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]); + + hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL); + if (!hinfo) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(hinfo->secret, secret, slen); + hinfo->slen = slen; + hinfo->alg_id = algid; + hinfo->hmackeyid = hmackeyid; + + err = seg6_hmac_info_add(net, hmackeyid, hinfo); + if (err) + kfree(hinfo); + +out_unlock: + mutex_unlock(&sdata->lock); + return err; +} + +#else + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +#endif + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo, + struct sk_buff *msg) +{ + if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) || + nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) || + nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) || + nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id)) + return -1; + + return 0; +} + +static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (__seg6_hmac_fill_info(hinfo, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct seg6_pernet_data *sdata; + struct rhashtable_iter *iter; + + sdata = seg6_pernet(net); + iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&sdata->hmac_infos, iter); + + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + + kfree(iter); + + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + struct net *net = sock_net(skb->sk); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + int ret; + + sdata = seg6_pernet(net); + + ret = rhashtable_walk_start(iter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + hinfo = rhashtable_walk_next(iter); + + if (IS_ERR(hinfo)) { + if (PTR_ERR(hinfo) == -EAGAIN) + continue; + ret = PTR_ERR(hinfo); + goto done; + } else if (!hinfo) { + break; + } + + ret = __seg6_genl_dumphmac_element(hinfo, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, SEG6_CMD_DUMPHMAC); + if (ret) + goto done; + } + + ret = skb->len; + +done: + rhashtable_walk_stop(iter); + return ret; +} + +#else + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +#endif + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_init(net); +#endif + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_exit(net); +#endif + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .start = seg6_genl_dumphmac_start, + .dumpit = seg6_genl_dumphmac, + .done = seg6_genl_dumphmac_done, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; + +#ifdef CONFIG_IPV6_SEG6_HMAC + err = seg6_hmac_init(); + if (err) + goto out_unregister_iptun; +#endif + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_SEG6_HMAC +out_unregister_iptun: + seg6_iptunnel_exit(); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_exit(); +#endif + seg6_iptunnel_exit(); + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..ef1c8a46e7ac --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,484 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <linux/mroute6.h> +#include <linux/slab.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/xfrm.h> + +#include <linux/cryptohash.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <net/seg6.h> +#include <net/genetlink.h> +#include <net/seg6_hmac.h> +#include <linux/random.h> + +static char * __percpu *hmac_ring; + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = *this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* cleanup flag */ + *off++ = !!(sr_has_cleanup(hdr)) << 7; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_ring(void) +{ + int i; + + hmac_ring = alloc_percpu(char *); + + if (!hmac_ring) + return -ENOMEM; + + for_each_possible_cpu(i) { + char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL); + + if (!ring) + return -ENOMEM; + + *per_cpu_ptr(hmac_ring, i) = ring; + } + + return 0; +} + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = this_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc(shsize, GFP_KERNEL); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + int ret; + + ret = seg6_hmac_init_ring(); + if (ret < 0) + goto out; + + ret = seg6_hmac_init_algo(); + +out: + return ret; +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + for_each_possible_cpu(i) { + char *ring = *per_cpu_ptr(hmac_ring, i); + + kfree(ring); + } + free_percpu(hmac_ring); + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..bbfca22c34ae --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,431 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_iptunnel.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#ifdef CONFIG_DST_CACHE +#include <net/dst_cache.h> +#endif +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + struct net *net = dev_net(skb_dst(skb)->dev); + + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + dst = dst_cache_get(&slwt->cache); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b1cdf8009d29..dc7a3449ffc1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1318,23 +1318,11 @@ done: return err; } -static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - int t_hlen = tunnel->hlen + sizeof(struct iphdr); - - if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, - .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, }; @@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = 0xFFF8 - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..97830a6a9cbb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,6 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b9f1fee9a886..28ec0a2e7b72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -828,6 +829,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b2ef061e6836..86a8cacd333b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -302,7 +302,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ - IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \ + IS_ENABLED(CONFIG_NF_SOCKET_IPV6) struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { @@ -334,7 +335,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; int is_udp4; - bool slow; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -344,8 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -378,7 +377,6 @@ try_again: goto csum_copy_err; } if (unlikely(err)) { - trace_kfree_skb(skb, udpv6_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); if (is_udp4) @@ -388,7 +386,7 @@ try_again: UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - skb_free_datagram_locked(sk, skb); + kfree_skb(skb); return err; } if (!peeked) { @@ -427,7 +425,7 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) @@ -438,12 +436,11 @@ try_again: if (flags & MSG_TRUNC) err = ulen; - __skb_free_datagram_locked(sk, skb, peeking ? -err : err); + skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: - slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -456,7 +453,7 @@ csum_copy_err: UDP_MIB_INERRORS, is_udplite); } } - unlock_sock_fast(sk, slow); + kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -524,7 +521,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = __sock_queue_rcv_skb(sk, skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -536,6 +533,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + return 0; } @@ -557,7 +555,6 @@ EXPORT_SYMBOL(udpv6_encap_enable); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -623,25 +620,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; udp_csum_pull_header(skb); - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } skb_dst_drop(skb); - bh_lock_sock(sk); - rc = 0; - if (!sock_owned_by_user(sk)) - rc = __udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - - return rc; + return __udpv6_queue_rcv_skb(sk, skb); csum_error: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -1156,6 +1138,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { @@ -1434,12 +1417,12 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, + .init = udp_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = __udpv6_queue_rcv_skb, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index d8b7267280c3..74d09f91709e 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -51,7 +51,6 @@ static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, .ndo_set_rx_mode = irlan_eth_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -67,7 +66,8 @@ static void irlan_eth_setup(struct net_device *dev) dev->netdev_ops = &irlan_eth_netdev_ops; dev->destructor = free_netdev; - + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; /* * Lets do all queueing in IrTTP instead of this device driver. diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index e15c40e86660..7fc340e574cf 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -24,13 +24,7 @@ -static struct genl_family irda_nl_family = { - .id = GENL_ID_GENERATE, - .name = IRDA_NL_NAME, - .hdrsize = 0, - .version = IRDA_NL_VERSION, - .maxattr = IRDA_NL_CMD_MAX, -}; +static struct genl_family irda_nl_family; static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { @@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = { }; -int irda_nl_register(void) +static struct genl_family irda_nl_family __ro_after_init = { + .name = IRDA_NL_NAME, + .hdrsize = 0, + .version = IRDA_NL_VERSION, + .maxattr = IRDA_NL_CMD_MAX, + .module = THIS_MODULE, + .ops = irda_nl_ops, + .n_ops = ARRAY_SIZE(irda_nl_ops), +}; + +int __init irda_nl_register(void) { - return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops); + return genl_register_family(&irda_nl_family); } void irda_nl_unregister(void) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 02b45a8e8b35..cfb9e5f4e28f 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -453,19 +453,27 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) } } -/* Send FIN through an IUCV socket for HIPER transport */ +/* Send controlling flags through an IUCV socket for HIPER transport */ static int iucv_send_ctrl(struct sock *sk, u8 flags) { int err = 0; int blen; struct sk_buff *skb; + u8 shutdown = 0; blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + if (sk->sk_shutdown & SEND_SHUTDOWN) { + /* controlling flags should be sent anyway */ + shutdown = sk->sk_shutdown; + sk->sk_shutdown &= RCV_SHUTDOWN; + } skb = sock_alloc_send_skb(sk, blen, 1, &err); if (skb) { skb_reserve(skb, blen); err = afiucv_hs_send(NULL, sk, skb, flags); } + if (shutdown) + sk->sk_shutdown = shutdown; return err; } @@ -1315,8 +1323,13 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } IUCV_SKB_CB(skb)->offset = 0; - if (sock_queue_rcv_skb(sk, skb)) - skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); + if (sk_filter(sk, skb)) { + atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + kfree_skb(skb); + return; + } + if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */ + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); } /* iucv_process_message_q() - Process outstanding IUCV messages @@ -1430,13 +1443,13 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { IUCV_SKB_CB(rskb)->offset = 0; - if (sock_queue_rcv_skb(sk, rskb)) { + if (__sock_queue_rcv_skb(sk, rskb)) { + /* handle rcv queue full */ skb_queue_head(&iucv->backlog_skb_q, rskb); break; - } else { - rskb = skb_dequeue(&iucv->backlog_skb_q); } + rskb = skb_dequeue(&iucv->backlog_skb_q); } if (skb_queue_empty(&iucv->backlog_skb_q)) { if (!list_empty(&iucv->message_q.list)) @@ -2116,12 +2129,17 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_reset_transport_header(skb); skb_reset_network_header(skb); IUCV_SKB_CB(skb)->offset = 0; + if (sk_filter(sk, skb)) { + atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { - if (sock_queue_rcv_skb(sk, skb)) { + if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */ skb_queue_tail(&iucv->backlog_skb_q, skb); - } } else skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); spin_unlock(&iucv->message_q.lock); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a2ed3bda4ddc..85948c69b236 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_info(session, L2TP_MSG_SEQ, "%s: requested to enable seq numbers by LNS\n", session->name); - session->send_seq = -1; + session->send_seq = 1; l2tp_session_set_header_len(session, tunnel->version); } } else { diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 965f7e344cef..e2c6ae024565 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -259,6 +259,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session->mtu = dev->mtu - session->hdr_len; dev->mtu = session->mtu; dev->needed_headroom += session->hdr_len; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; priv = netdev_priv(dev); priv->dev = dev; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..1cea54feab27 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -519,6 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bf3117771822..3620fba31786 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -31,14 +31,7 @@ #include "l2tp_core.h" -static struct genl_family l2tp_nl_family = { - .id = GENL_ID_GENERATE, - .name = L2TP_GENL_NAME, - .version = L2TP_GENL_VERSION, - .hdrsize = 0, - .maxattr = L2TP_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { { @@ -227,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]); if (info->attrs[L2TP_ATTR_UDP_DPORT]) cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); - if (info->attrs[L2TP_ATTR_UDP_CSUM]) - cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + cfg.use_udp_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_CSUM]); #if IS_ENABLED(CONFIG_IPV6) - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) - cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) - cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); + cfg.udp6_zero_tx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + cfg.udp6_zero_rx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); #endif } @@ -386,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: + switch (sk->sk_family) { + case AF_INET: + if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (udp_get_no_check6_tx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) + goto nla_put_failure; + if (udp_get_no_check6_rx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) + goto nla_put_failure; + break; +#endif + } if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || - nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: @@ -977,6 +985,19 @@ static const struct genl_ops l2tp_nl_ops[] = { }, }; +static struct genl_family l2tp_nl_family __ro_after_init = { + .name = L2TP_GENL_NAME, + .version = L2TP_GENL_VERSION, + .hdrsize = 0, + .maxattr = L2TP_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = l2tp_nl_ops, + .n_ops = ARRAY_SIZE(l2tp_nl_ops), + .mcgrps = l2tp_multicast_group, + .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), +}; + int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) { int ret; @@ -1010,12 +1031,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type) } EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); -static int l2tp_nl_init(void) +static int __init l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops_groups(&l2tp_nl_family, - l2tp_nl_ops, - l2tp_multicast_group); + return genl_register_family(&l2tp_nl_family); } static void l2tp_nl_cleanup(void) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 41d47bfda15c..2ddfec1e4acf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1272,7 +1272,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->recv_seq = val ? -1 : 0; + session->recv_seq = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set recv_seq=%d\n", session->name, session->recv_seq); @@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->send_seq = val ? -1 : 0; + session->send_seq = !!val; { struct sock *ssk = ps->sock; struct pppox_sock *po = pppox_sk(ssk); @@ -1301,7 +1301,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->lns_mode = val ? -1 : 0; + session->lns_mode = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set lns_mode=%d\n", session->name, session->lns_mode); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index db916cf51ffe..5e9296382420 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -532,12 +532,12 @@ out: static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc = 0; + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) + if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -547,39 +547,39 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) break; rc = 0; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) + if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait)) break; if (signal_pending(current) || !timeout) break; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return timeout; } static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct llc_sock *llc = llc_sk(sk); int rc; + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || (!llc_data_accept_state(llc->state) && !llc->remote_busy_flag && - !llc->p_flag))) + !llc->p_flag), &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) if (!timeout) break; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index f9137a8341f4..0b202b343fd4 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -19,6 +19,7 @@ mac80211-y := \ aes_gcm.o \ aes_cmac.o \ aes_gmac.o \ + fils_aead.o \ cfg.o \ ethtool.o \ rx.o \ diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index bdf0790d89cc..d0bd5fff5f0a 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -23,7 +23,7 @@ #define AAD_LEN 20 -static void gf_mulx(u8 *pad) +void gf_mulx(u8 *pad) { int i, carry; @@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad) pad[AES_BLOCK_SIZE - 1] ^= 0x87; } -static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac, - size_t mac_len) +void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len) { u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 3702041f44fd..c827e1d5de8b 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,6 +11,10 @@ #include <linux/crypto.h> +void gf_mulx(u8 *pad); +void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len); struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], size_t key_len); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f6749dced021..3b5fd4188f2a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { - tid_agg_rx = rcu_dereference_protected( - sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); - - if (tid_agg_rx->dialog_token == dialog_token) { + if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); @@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } /* update data */ - tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; tid_agg_rx->buf_size = buf_size; @@ -418,6 +413,7 @@ end: if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } mutex_unlock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fd6541f3ade3..e91e503bf992 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -357,10 +357,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, mutex_lock(&local->sta_mtx); if (mac_addr) { - if (ieee80211_vif_is_mesh(&sdata->vif)) - sta = sta_info_get(sdata, mac_addr); - else - sta = sta_info_get_bss(sdata, mac_addr); + sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed @@ -867,6 +864,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, } sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->vif.bss_conf.beacon_int = params->beacon_interval; + mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); @@ -897,7 +896,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, vlan->vif.type); } - sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; @@ -1523,9 +1521,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, goto out_err; if (params->vlan && params->vlan != sta->sdata->dev) { - bool prev_4addr = false; - bool new_4addr = false; - vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { @@ -1535,26 +1530,21 @@ static int ieee80211_change_station(struct wiphy *wiphy, } rcu_assign_pointer(vlansdata->u.vlan.sta, sta); - new_4addr = true; __ieee80211_check_fast_rx_iface(vlansdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - prev_4addr = true; - } + + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (sta->sta_state == IEEE80211_STA_AUTHORIZED && - prev_4addr != new_4addr) { - if (new_4addr) - atomic_dec(&sta->sdata->bss->num_mcast_sta); - else - atomic_inc(&sta->sdata->bss->num_mcast_sta); - } + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ieee80211_vif_inc_num_mcast(sta->sdata); ieee80211_send_layer2_update(sta); } @@ -2480,13 +2470,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, smps_mode == IEEE80211_SMPS_AUTOMATIC) return 0; - /* If no associated stations, there's no need to do anything */ - if (!atomic_read(&sdata->u.ap.num_mcast_sta)) { - sdata->smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps); - return 0; - } - ht_dbg(sdata, "SMPS %d requested in AP mode, sending Action frame to %d stations\n", smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta)); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f56e2f487d09..e02ba42ca827 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -210,6 +210,7 @@ static const char *hw_flag_names[] = { FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), FLAG(REPORTS_LOW_ACK), + FLAG(SUPPORTS_TX_FRAG), #undef FLAG }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index bcec1240f41d..1a05f85cb1f0 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -477,6 +477,7 @@ IEEE80211_IF_FILE_RW(tdls_wider_bw); IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC); IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC); +IEEE80211_IF_FILE(num_mcast_sta_vlan, u.vlan.num_mcast_sta, ATOMIC); static ssize_t ieee80211_if_fmt_num_buffered_multicast( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -684,6 +685,13 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } +static void add_vlan_files(struct ieee80211_sub_if_data *sdata) +{ + /* add num_mcast_sta_vlan using name num_mcast_sta */ + debugfs_create_file("num_mcast_sta", 0400, sdata->vif.debugfs_dir, + sdata, &num_mcast_sta_vlan_ops); +} + static void add_ibss_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); @@ -787,6 +795,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_AP: add_ap_files(sdata); break; + case NL80211_IFTYPE_AP_VLAN: + add_vlan_files(sdata); + break; case NL80211_IFTYPE_WDS: add_wds_files(sdata); break; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a2fcdb47a0e6..f6003b8c2c33 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -199,13 +199,18 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + bool tid_rx_valid; + tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); + p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", + tid_rx_valid); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - tid_rx ? tid_rx->dialog_token : 0); + tid_rx_valid ? + sta->ampdu_mlme.tid_rx_token[i] : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c new file mode 100644 index 000000000000..ecfdd97758a3 --- /dev/null +++ b/net/mac80211/fils_aead.c @@ -0,0 +1,342 @@ +/* + * FILS AEAD for (Re)Association Request/Response frames + * Copyright 2016, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/skcipher.h> + +#include "ieee80211_i.h" +#include "aes_cmac.h" +#include "fils_aead.h" + +static int aes_s2v(struct crypto_cipher *tfm, + size_t num_elem, const u8 *addr[], size_t len[], u8 *v) +{ + u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE]; + size_t i; + const u8 *data[2]; + size_t data_len[2], data_elems; + + /* D = AES-CMAC(K, <zero>) */ + memset(tmp, 0, AES_BLOCK_SIZE); + data[0] = tmp; + data_len[0] = AES_BLOCK_SIZE; + aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE); + + for (i = 0; i < num_elem - 1; i++) { + /* D = dbl(D) xor AES_CMAC(K, Si) */ + gf_mulx(d); /* dbl */ + aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp, + AES_BLOCK_SIZE); + crypto_xor(d, tmp, AES_BLOCK_SIZE); + } + + if (len[i] >= AES_BLOCK_SIZE) { + /* len(Sn) >= 128 */ + size_t j; + const u8 *pos; + + /* T = Sn xorend D */ + + /* Use a temporary buffer to perform xorend on Sn (addr[i]) to + * avoid modifying the const input argument. + */ + data[0] = addr[i]; + data_len[0] = len[i] - AES_BLOCK_SIZE; + pos = addr[i] + data_len[0]; + for (j = 0; j < AES_BLOCK_SIZE; j++) + tmp[j] = pos[j] ^ d[j]; + data[1] = tmp; + data_len[1] = AES_BLOCK_SIZE; + data_elems = 2; + } else { + /* len(Sn) < 128 */ + /* T = dbl(D) xor pad(Sn) */ + gf_mulx(d); /* dbl */ + memset(tmp, 0, AES_BLOCK_SIZE); + memcpy(tmp, addr[i], len[i]); + tmp[len[i]] = 0x80; + crypto_xor(d, tmp, AES_BLOCK_SIZE); + data[0] = d; + data_len[0] = sizeof(d); + data_elems = 1; + } + /* V = AES-CMAC(K, T) */ + aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE); + + return 0; +} + +/* Note: addr[] and len[] needs to have one extra slot at the end. */ +static int aes_siv_encrypt(const u8 *key, size_t key_len, + const u8 *plain, size_t plain_len, + size_t num_elem, const u8 *addr[], + size_t len[], u8 *out) +{ + u8 v[AES_BLOCK_SIZE]; + struct crypto_cipher *tfm; + struct crypto_skcipher *tfm2; + struct skcipher_request *req; + int res; + struct scatterlist src[1], dst[1]; + u8 *tmp; + + key_len /= 2; /* S2V key || CTR key */ + + addr[num_elem] = plain; + len[num_elem] = plain_len; + num_elem++; + + /* S2V */ + + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + /* K1 for S2V */ + res = crypto_cipher_setkey(tfm, key, key_len); + if (!res) + res = aes_s2v(tfm, num_elem, addr, len, v); + crypto_free_cipher(tfm); + if (res) + return res; + + /* Use a temporary buffer of the plaintext to handle need for + * overwriting this during AES-CTR. + */ + tmp = kmemdup(plain, plain_len, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* IV for CTR before encrypted data */ + memcpy(out, v, AES_BLOCK_SIZE); + + /* Synthetic IV to be used as the initial counter in CTR: + * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31) + */ + v[8] &= 0x7f; + v[12] &= 0x7f; + + /* CTR */ + + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + if (IS_ERR(tfm2)) { + kfree(tmp); + return PTR_ERR(tfm2); + } + /* K2 for CTR */ + res = crypto_skcipher_setkey(tfm2, key + key_len, key_len); + if (res) + goto fail; + + req = skcipher_request_alloc(tfm2, GFP_KERNEL); + if (!req) { + res = -ENOMEM; + goto fail; + } + + sg_init_one(src, tmp, plain_len); + sg_init_one(dst, out + AES_BLOCK_SIZE, plain_len); + skcipher_request_set_crypt(req, src, dst, plain_len, v); + res = crypto_skcipher_encrypt(req); + skcipher_request_free(req); +fail: + kfree(tmp); + crypto_free_skcipher(tfm2); + return res; +} + +/* Note: addr[] and len[] needs to have one extra slot at the end. */ +static int aes_siv_decrypt(const u8 *key, size_t key_len, + const u8 *iv_crypt, size_t iv_c_len, + size_t num_elem, const u8 *addr[], size_t len[], + u8 *out) +{ + struct crypto_cipher *tfm; + struct crypto_skcipher *tfm2; + struct skcipher_request *req; + struct scatterlist src[1], dst[1]; + size_t crypt_len; + int res; + u8 frame_iv[AES_BLOCK_SIZE], iv[AES_BLOCK_SIZE]; + u8 check[AES_BLOCK_SIZE]; + + crypt_len = iv_c_len - AES_BLOCK_SIZE; + key_len /= 2; /* S2V key || CTR key */ + addr[num_elem] = out; + len[num_elem] = crypt_len; + num_elem++; + + memcpy(iv, iv_crypt, AES_BLOCK_SIZE); + memcpy(frame_iv, iv_crypt, AES_BLOCK_SIZE); + + /* Synthetic IV to be used as the initial counter in CTR: + * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31) + */ + iv[8] &= 0x7f; + iv[12] &= 0x7f; + + /* CTR */ + + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + if (IS_ERR(tfm2)) + return PTR_ERR(tfm2); + /* K2 for CTR */ + res = crypto_skcipher_setkey(tfm2, key + key_len, key_len); + if (res) { + crypto_free_skcipher(tfm2); + return res; + } + + req = skcipher_request_alloc(tfm2, GFP_KERNEL); + if (!req) { + crypto_free_skcipher(tfm2); + return -ENOMEM; + } + + sg_init_one(src, iv_crypt + AES_BLOCK_SIZE, crypt_len); + sg_init_one(dst, out, crypt_len); + skcipher_request_set_crypt(req, src, dst, crypt_len, iv); + res = crypto_skcipher_decrypt(req); + skcipher_request_free(req); + crypto_free_skcipher(tfm2); + if (res) + return res; + + /* S2V */ + + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + /* K1 for S2V */ + res = crypto_cipher_setkey(tfm, key, key_len); + if (!res) + res = aes_s2v(tfm, num_elem, addr, len, check); + crypto_free_cipher(tfm); + if (res) + return res; + if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0) + return -EINVAL; + return 0; +} + +int fils_encrypt_assoc_req(struct sk_buff *skb, + struct ieee80211_mgd_assoc_data *assoc_data) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + u8 *capab, *ies, *encr; + const u8 *addr[5 + 1], *session; + size_t len[5 + 1]; + size_t crypt_len; + + if (ieee80211_is_reassoc_req(mgmt->frame_control)) { + capab = (u8 *)&mgmt->u.reassoc_req.capab_info; + ies = mgmt->u.reassoc_req.variable; + } else { + capab = (u8 *)&mgmt->u.assoc_req.capab_info; + ies = mgmt->u.assoc_req.variable; + } + + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, skb->data + skb->len - ies); + if (!session || session[1] != 1 + 8) + return -EINVAL; + /* encrypt after FILS Session element */ + encr = (u8 *)session + 2 + 1 + 8; + + /* AES-SIV AAD vectors */ + + /* The STA's MAC address */ + addr[0] = mgmt->sa; + len[0] = ETH_ALEN; + /* The AP's BSSID */ + addr[1] = mgmt->da; + len[1] = ETH_ALEN; + /* The STA's nonce */ + addr[2] = assoc_data->fils_nonces; + len[2] = FILS_NONCE_LEN; + /* The AP's nonce */ + addr[3] = &assoc_data->fils_nonces[FILS_NONCE_LEN]; + len[3] = FILS_NONCE_LEN; + /* The (Re)Association Request frame from the Capability Information + * field to the FILS Session element (both inclusive). + */ + addr[4] = capab; + len[4] = encr - capab; + + crypt_len = skb->data + skb->len - encr; + skb_put(skb, AES_BLOCK_SIZE); + return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, + encr, crypt_len, 1, addr, len, encr); +} + +int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, + u8 *frame, size_t *frame_len, + struct ieee80211_mgd_assoc_data *assoc_data) +{ + struct ieee80211_mgmt *mgmt = (void *)frame; + u8 *capab, *ies, *encr; + const u8 *addr[5 + 1], *session; + size_t len[5 + 1]; + int res; + size_t crypt_len; + + if (*frame_len < 24 + 6) + return -EINVAL; + + capab = (u8 *)&mgmt->u.assoc_resp.capab_info; + ies = mgmt->u.assoc_resp.variable; + session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, + ies, frame + *frame_len - ies); + if (!session || session[1] != 1 + 8) { + mlme_dbg(sdata, + "No (valid) FILS Session element in (Re)Association Response frame from %pM", + mgmt->sa); + return -EINVAL; + } + /* decrypt after FILS Session element */ + encr = (u8 *)session + 2 + 1 + 8; + + /* AES-SIV AAD vectors */ + + /* The AP's BSSID */ + addr[0] = mgmt->sa; + len[0] = ETH_ALEN; + /* The STA's MAC address */ + addr[1] = mgmt->da; + len[1] = ETH_ALEN; + /* The AP's nonce */ + addr[2] = &assoc_data->fils_nonces[FILS_NONCE_LEN]; + len[2] = FILS_NONCE_LEN; + /* The STA's nonce */ + addr[3] = assoc_data->fils_nonces; + len[3] = FILS_NONCE_LEN; + /* The (Re)Association Response frame from the Capability Information + * field to the FILS Session element (both inclusive). + */ + addr[4] = capab; + len[4] = encr - capab; + + crypt_len = frame + *frame_len - encr; + if (crypt_len < AES_BLOCK_SIZE) { + mlme_dbg(sdata, + "Not enough room for AES-SIV data after FILS Session element in (Re)Association Response frame from %pM", + mgmt->sa); + return -EINVAL; + } + res = aes_siv_decrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, + encr, crypt_len, 5, addr, len, encr); + if (res != 0) { + mlme_dbg(sdata, + "AES-SIV decryption of (Re)Association Response frame from %pM failed", + mgmt->sa); + return res; + } + *frame_len -= AES_BLOCK_SIZE; + return 0; +} diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h new file mode 100644 index 000000000000..fbc65232f0b3 --- /dev/null +++ b/net/mac80211/fils_aead.h @@ -0,0 +1,19 @@ +/* + * FILS AEAD for (Re)Association Request/Response frames + * Copyright 2016, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef FILS_AEAD_H +#define FILS_AEAD_H + +int fils_encrypt_assoc_req(struct sk_buff *skb, + struct ieee80211_mgd_assoc_data *assoc_data); +int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, + u8 *frame, size_t *frame_len, + struct ieee80211_mgd_assoc_data *assoc_data); + +#endif /* FILS_AEAD_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 34c2add2c455..d37a577f63a1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -84,6 +84,8 @@ struct ieee80211_local; #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL +extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; + #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) #define IEEE80211_MAX_NAN_INSTANCE_ID 255 @@ -307,6 +309,7 @@ struct ieee80211_if_vlan { /* used for all tx if the VLAN is configured to 4-addr mode */ struct sta_info __rcu *sta; + atomic_t num_mcast_sta; /* number of stations receiving multicast */ }; struct mesh_stats { @@ -398,6 +401,10 @@ struct ieee80211_mgd_assoc_data { struct ieee80211_vht_cap ap_vht_cap; + u8 fils_nonces[2 * FILS_NONCE_LEN]; + u8 fils_kek[FILS_MAX_KEK_LEN]; + size_t fils_kek_len; + size_t ie_len; u8 ie[]; }; @@ -442,7 +449,7 @@ struct ieee80211_if_managed { struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; - u8 bssid[ETH_ALEN]; + u8 bssid[ETH_ALEN] __aligned(2); u16 aid; @@ -1527,6 +1534,23 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status) return false; } +void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata); + +/* This function returns the number of multicast stations connected to this + * interface. It returns -1 if that number is not tracked, that is for netdevs + * not in AP or AP_VLAN mode or when using 4addr. + */ +static inline int +ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + return atomic_read(&sdata->u.ap.num_mcast_sta); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) + return atomic_read(&sdata->u.vlan.num_mcast_sta); + return -1; +} + u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, struct ieee80211_rx_status *status, unsigned int mpdu_len, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 638ec0759078..41497b670e2b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -150,15 +150,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_hw_config(local, change); } -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, bool check_dup) { @@ -1166,7 +1157,6 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, - .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, @@ -1200,7 +1190,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, - .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, @@ -1884,6 +1873,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); + /* MTU range: 256 - 2304 */ + ndev->min_mtu = 256; + ndev->max_mtu = IEEE80211_MAX_DATA_LEN; + ret = register_netdevice(ndev); if (ret) { ieee80211_if_free(ndev); @@ -2005,3 +1998,19 @@ void ieee80211_iface_exit(void) { unregister_netdevice_notifier(&mac80211_netdev_notifier); } + +void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + atomic_inc(&sdata->u.ap.num_mcast_sta); + else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + atomic_inc(&sdata->u.vlan.num_mcast_sta); +} + +void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP) + atomic_dec(&sdata->u.ap.num_mcast_sta); + else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + atomic_dec(&sdata->u.vlan.num_mcast_sta); +} diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1075ac24c8c5..1822c77f2b1c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -549,6 +549,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_MAC_ON_CREATE | NL80211_FEATURE_USERSPACE_MPM | NL80211_FEATURE_FULL_AP_CLIENT_STATE; + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -821,6 +822,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) && + !local->ops->set_frag_threshold)) + return -EINVAL; + if (WARN_ON(local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN) && (!local->ops->start_nan || !local->ops->stop_nan))) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7486f2dab4ba..d157b250ff77 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -30,6 +30,7 @@ #include "driver-ops.h" #include "rate.h" #include "led.h" +#include "fils_aead.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) @@ -652,6 +653,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */ assoc_data->ie_len + /* extra IEs */ + (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) + 9, /* WMM */ GFP_KERNEL); if (!skb) @@ -875,6 +877,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) memcpy(pos, assoc_data->ie + offset, noffset - offset); } + if (assoc_data->fils_kek_len && + fils_encrypt_assoc_req(skb, assoc_data) < 0) { + dev_kfree_skb(skb); + return; + } + drv_mgd_prepare_tx(local, sdata); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -2618,6 +2626,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, case WLAN_AUTH_LEAP: case WLAN_AUTH_FT: case WLAN_AUTH_SAE: + case WLAN_AUTH_FILS_SK: + case WLAN_AUTH_FILS_SK_PFS: + case WLAN_AUTH_FILS_PK: break; case WLAN_AUTH_SHARED_KEY: if (ifmgd->auth_data->expected_transaction != 4) { @@ -3143,6 +3154,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, reassoc ? "Rea" : "A", mgmt->sa, capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + if (assoc_data->fils_kek_len && + fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0) + return; + pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); @@ -3193,7 +3208,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, uapsd_queues = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) if (sdata->tx_conf[ac].uapsd) - uapsd_queues |= BIT(ac); + uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; } cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues); @@ -4479,24 +4494,36 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, case NL80211_AUTHTYPE_SAE: auth_alg = WLAN_AUTH_SAE; break; + case NL80211_AUTHTYPE_FILS_SK: + auth_alg = WLAN_AUTH_FILS_SK; + break; + case NL80211_AUTHTYPE_FILS_SK_PFS: + auth_alg = WLAN_AUTH_FILS_SK_PFS; + break; + case NL80211_AUTHTYPE_FILS_PK: + auth_alg = WLAN_AUTH_FILS_PK; + break; default: return -EOPNOTSUPP; } - auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len + + auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); if (!auth_data) return -ENOMEM; auth_data->bss = req->bss; - if (req->sae_data_len >= 4) { - __le16 *pos = (__le16 *) req->sae_data; - auth_data->sae_trans = le16_to_cpu(pos[0]); - auth_data->sae_status = le16_to_cpu(pos[1]); - memcpy(auth_data->data, req->sae_data + 4, - req->sae_data_len - 4); - auth_data->data_len += req->sae_data_len - 4; + if (req->auth_data_len >= 4) { + if (req->auth_type == NL80211_AUTHTYPE_SAE) { + __le16 *pos = (__le16 *) req->auth_data; + + auth_data->sae_trans = le16_to_cpu(pos[0]); + auth_data->sae_status = le16_to_cpu(pos[1]); + } + memcpy(auth_data->data, req->auth_data + 4, + req->auth_data_len - 4); + auth_data->data_len += req->auth_data_len - 4; } if (req->ie && req->ie_len) { @@ -4692,6 +4719,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ie_len = req->ie_len; } + if (req->fils_kek) { + /* should already be checked in cfg80211 - so warn */ + if (WARN_ON(req->fils_kek_len > FILS_MAX_KEK_LEN)) { + err = -EINVAL; + goto err_free; + } + memcpy(assoc_data->fils_kek, req->fils_kek, + req->fils_kek_len); + assoc_data->fils_kek_len = req->fils_kek_len; + } + + if (req->fils_nonces) + memcpy(assoc_data->fils_nonces, req->fils_nonces, + 2 * FILS_NONCE_LEN); + assoc_data->bss = req->bss; if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a47bbc973f2d..eeab7250f4b9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1394,13 +1394,15 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) u8 ac = ieee802_1d_to_ac[tid & 7]; /* - * If this AC is not trigger-enabled do nothing. + * If this AC is not trigger-enabled do nothing unless the + * driver is calling us after it already checked. * * NB: This could/should check a separate bitmap of trigger- * enabled queues, but for now we only implement uAPSD w/o * TSPEC changes to the ACs, so they're always the same. */ - if (!(sta->sta.uapsd_queues & BIT(ac))) + if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) && + tid != IEEE80211_NUM_TIDS) return; /* if we are in a service period, do nothing */ @@ -2215,7 +2217,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { - if (is_multicast_ether_addr(ehdr->h_dest)) { + if (is_multicast_ether_addr(ehdr->h_dest) && + ieee80211_vif_get_num_mcast_if(sdata) != 0) { /* * send multicast frames both to higher layers in * local net stack and back to the wireless medium @@ -2224,7 +2227,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (!xmit_skb) net_info_ratelimited("%s: failed to clone multicast frame\n", dev->name); - } else { + } else if (!is_multicast_ether_addr(ehdr->h_dest)) { dsta = sta_info_get(sdata, skb->data); if (dsta) { /* diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 78e9ecbc96e6..236d47e76ced 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignore_for_tim & BIT(ac)) + if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac]) continue; indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, return true; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; if (!skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; tids = ieee80211_tids_for_ac(ac); @@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { - int tid; + int tid, ac; /* * For PS-Poll, this can only happen due to a race condition @@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, */ /* This will evaluate to 1, 3, 5 or 7. */ - tid = 7 - ((ffs(~ignored_acs) - 1) << 1); + for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) + if (ignored_acs & BIT(ac)) + continue; + tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { @@ -1871,10 +1874,7 @@ int sta_info_move_state(struct sta_info *sta, if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { - if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !sta->sdata->u.vlan.sta)) - atomic_dec(&sta->sdata->bss->num_mcast_sta); + ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); @@ -1882,10 +1882,7 @@ int sta_info_move_state(struct sta_info *sta, break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state == IEEE80211_STA_ASSOC) { - if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !sta->sdata->u.vlan.sta)) - atomic_inc(&sta->sdata->bss->num_mcast_sta); + ieee80211_vif_inc_num_mcast(sta->sdata); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ed5fcb984a01..dd06ef0b8861 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -184,7 +184,6 @@ struct tid_ampdu_tx { * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). - * @dialog_token: dialog token for aggregation session * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and @@ -213,7 +212,6 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - u8 dialog_token; bool auto_seq; bool removed; }; @@ -225,6 +223,7 @@ struct tid_ampdu_rx { * to tid_tx[idx], which are protected by the sta spinlock) * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the @@ -243,6 +242,7 @@ struct sta_ampdu_mlme { struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; + u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1c56abc49627..62ccaf6f585d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -331,9 +331,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; } - } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP && - ieee80211_is_data(hdr->frame_control) && - !atomic_read(&tx->sdata->u.ap.num_mcast_sta))) { + } else if (unlikely(ieee80211_is_data(hdr->frame_control) && + ieee80211_vif_get_num_mcast_if(tx->sdata) == 0)) { /* * No associated STAs - no need to send multicast * frames. @@ -935,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (info->flags & IEEE80211_TX_CTL_DONTFRAG) return TX_CONTINUE; - if (tx->local->ops->set_frag_threshold) + if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) return TX_CONTINUE; /* @@ -2801,7 +2800,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* fast-xmit doesn't handle fragmentation at all */ if (local->hw.wiphy->frag_threshold != (u32)-1 && - !local->ops->set_frag_threshold) + !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG)) goto out; rcu_read_lock(); @@ -3060,11 +3059,12 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; - struct ethhdr amsdu_hdr; + struct ethhdr *amsdu_hdr; int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); int subframe_len = skb->len - hdr_len; void *data; - u8 *qc; + u8 *qc, *h_80211_src, *h_80211_dst; + const u8 *bssid; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) return false; @@ -3072,19 +3072,44 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr), &subframe_len)) return false; - amsdu_hdr.h_proto = cpu_to_be16(subframe_len); - memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); - memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + data = skb_push(skb, sizeof(*amsdu_hdr)); + memmove(data, data + sizeof(*amsdu_hdr), hdr_len); + hdr = data; + amsdu_hdr = data + hdr_len; + /* h_80211_src/dst is addr* field within hdr */ + h_80211_src = data + fast_tx->sa_offs; + h_80211_dst = data + fast_tx->da_offs; + + amsdu_hdr->h_proto = cpu_to_be16(subframe_len); + ether_addr_copy(amsdu_hdr->h_source, h_80211_src); + ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst); + + /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA + * fields needs to be changed to BSSID for A-MSDU frames depending + * on FromDS/ToDS values. + */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + bssid = sdata->u.mgd.bssid; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + bssid = sdata->vif.addr; + break; + default: + bssid = NULL; + } - data = skb_push(skb, sizeof(amsdu_hdr)); - memmove(data, data + sizeof(amsdu_hdr), hdr_len); - memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + if (bssid && ieee80211_has_fromds(hdr->frame_control)) + ether_addr_copy(h_80211_src, bssid); + + if (bssid && ieee80211_has_tods(hdr->frame_control)) + ether_addr_copy(h_80211_dst, bssid); - hdr = data; qc = ieee80211_get_qos_ctl(hdr); *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 545c79a42a77..ac59fbd280df 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; enum nl80211_iftype iftype = sdata->wdev.iftype; - int num[NUM_NL80211_IFTYPES]; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; int total = 1; + struct iface_combination_params params = { + .radar_detect = radar_detect, + }; lockdep_assert_held(&local->chanctx_mtx); @@ -3322,12 +3323,19 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, !chandef->chan)) return -EINVAL; - if (chandef) - num_different_channels = 1; - if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return -EINVAL; + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) { + /* + * always passing this is harmless, since it'll be the + * same value that cfg80211 finds if it finds the same + * interface ... and that's always allowed + */ + params.new_beacon_int = sdata->vif.bss_conf.beacon_int; + } + /* Always allow software iftypes */ if (local->hw.wiphy->software_iftypes & BIT(iftype)) { if (radar_detect) @@ -3335,24 +3343,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, return 0; } - memset(num, 0, sizeof(num)); + if (chandef) + params.num_different_channels = 1; if (iftype != NL80211_IFTYPE_UNSPECIFIED) - num[iftype] = 1; + params.iftype_num[iftype] = 1; list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { - num_different_channels++; + params.num_different_channels++; continue; } if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && cfg80211_chandef_compatible(chandef, &ctx->conf.def)) continue; - num_different_channels++; + params.num_different_channels++; } list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { @@ -3365,16 +3375,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) continue; - num[wdev_iter->iftype]++; + params.iftype_num[wdev_iter->iftype]++; total++; } - if (total == 1 && !radar_detect) + if (total == 1 && !params.radar_detect) return 0; - return cfg80211_check_combinations(local->hw.wiphy, - num_different_channels, - radar_detect, num); + return cfg80211_check_combinations(local->hw.wiphy, ¶ms); } static void @@ -3390,12 +3398,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, int ieee80211_max_num_channels(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int num[NUM_NL80211_IFTYPES] = {}; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; - u8 radar_detect = 0; u32 max_num_different_channels = 1; int err; + struct iface_combination_params params = {0}; lockdep_assert_held(&local->chanctx_mtx); @@ -3403,17 +3409,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - num_different_channels++; + params.num_different_channels++; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); } list_for_each_entry_rcu(sdata, &local->interfaces, list) - num[sdata->wdev.iftype]++; + params.iftype_num[sdata->wdev.iftype]++; - err = cfg80211_iter_combinations(local->hw.wiphy, - num_different_channels, radar_detect, - num, ieee80211_iter_max_chans, + err = cfg80211_iter_combinations(local->hw.wiphy, ¶ms, + ieee80211_iter_max_chans, &max_num_different_channels); if (err < 0) return err; @@ -3456,3 +3462,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); + +const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = { + IEEE80211_WMM_IE_STA_QOSINFO_AC_VO, + IEEE80211_WMM_IE_STA_QOSINFO_AC_VI, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BE, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BK +}; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 9eb0aee9105b..3e3d3014e9ab 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -236,26 +236,35 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, { struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + u8 flags; u8 *p; - u8 ack_policy, tid; if (!ieee80211_is_data_qos(hdr->frame_control)) return; p = ieee80211_get_qos_ctl(hdr); - tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - /* preserve EOSP bit */ - ack_policy = *p & IEEE80211_QOS_CTL_EOSP; + /* set up the first byte */ + + /* + * preserve everything but the TID and ACK policy + * (which we both write here) + */ + flags = *p & ~(IEEE80211_QOS_CTL_TID_MASK | + IEEE80211_QOS_CTL_ACK_POLICY_MASK); if (is_multicast_ether_addr(hdr->addr1) || sdata->noack_map & BIT(tid)) { - ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; + flags |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; info->flags |= IEEE80211_TX_CTL_NO_ACK; } - /* qos header is 2 bytes */ - *p++ = ack_policy | tid; + *p = flags | tid; + + /* set up the second byte */ + p++; + if (ieee80211_vif_is_mesh(&sdata->vif)) { /* preserve RSPI and Mesh PS Level bit */ *p &= ((IEEE80211_QOS_CTL_RSPI | diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 42ce9bd4426f..8af6dd388d11 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) if (info->control.hw_key && (info->flags & IEEE80211_TX_CTL_DONTFRAG || - tx->local->ops->set_frag_threshold) && + ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { /* hwaccel - with no need for SW-generated MMIC */ return TX_CONTINUE; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cf52cf30ac4b..2f7ccd934416 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; - int tun_encap_info_len; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, @@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[MPLS_IPTUNNEL_DST]) return -EINVAL; - tun_encap_info_len = sizeof(*tun_encap_info); - newts = lwtunnel_state_alloc(tun_encap_info_len); + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); if (!newts) return -ENOMEM; - newts->len = tun_encap_info_len; tun_encap_info = mpls_lwtunnel_encap(newts); ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, &tun_encap_info->labels, tun_encap_info->label); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..44410d30d461 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -57,6 +57,10 @@ config NF_CONNTRACK config NF_LOG_COMMON tristate +config NF_LOG_NETDEV + tristate "Netdev packet logging" + select NF_LOG_COMMON + if NF_CONNTRACK config NF_CONNTRACK_MARK @@ -474,6 +478,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_RT + tristate "Netfilter nf_tables routing module" + help + This option adds the "rt" expression that you can use to match + packet routing information such as the packet nexthop. + config NFT_NUMGEN tristate "Netfilter nf_tables number generator module" help @@ -581,6 +591,19 @@ config NFT_HASH This option adds the "hash" expression that you can use to perform a hash operation on registers. +config NFT_FIB + tristate + +config NFT_FIB_INET + depends on NF_TABLES_INET + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables fib inet support" + help + This option allows using the FIB expression from the inet table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + if NF_TABLES_NETDEV config NF_DUP_NETDEV @@ -1409,9 +1432,10 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on !NF_CONNTRACK || NF_CONNTRACK depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n + depends on NF_SOCKET_IPV4 + depends on NF_SOCKET_IPV6 select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..5bbf767672ec 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -48,6 +48,9 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o +# packet logging for netdev family +obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o + obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o @@ -81,6 +84,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o @@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_FIB) += nft_fib.o +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 004af030ef1a..de30e08d58f2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -302,70 +302,40 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(_nf_unregister_hooks); -unsigned int nf_iterate(struct sk_buff *skb, - struct nf_hook_state *state, - struct nf_hook_entry **entryp) +/* Returns 1 if okfn() needs to be executed by the caller, + * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry) { unsigned int verdict; + int ret; - /* - * The caller must not block between calls to this - * function because of risk of continuing from deleted element. - */ - while (*entryp) { - if (state->thresh > (*entryp)->ops.priority) { - *entryp = rcu_dereference((*entryp)->next); - continue; - } - - /* Optimization: we don't need to hold module - reference here, since function can't sleep. --RR */ -repeat: - verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); - if (verdict != NF_ACCEPT) { -#ifdef CONFIG_NETFILTER_DEBUG - if (unlikely((verdict & NF_VERDICT_MASK) - > NF_MAX_VERDICT)) { - NFDEBUG("Evil return from %p(%u).\n", - (*entryp)->ops.hook, state->hook); - *entryp = rcu_dereference((*entryp)->next); + do { + verdict = entry->ops.hook(entry->ops.priv, skb, state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + entry = rcu_dereference(entry->next); + break; + case NF_DROP: + kfree_skb(skb); + ret = NF_DROP_GETERR(verdict); + if (ret == 0) + ret = -EPERM; + return ret; + case NF_QUEUE: + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) continue; - } -#endif - if (verdict != NF_REPEAT) - return verdict; - goto repeat; + return ret; + default: + /* Implicit handling for NF_STOLEN, as well as any other + * non conventional verdicts. + */ + return 0; } - *entryp = rcu_dereference((*entryp)->next); - } - return NF_ACCEPT; -} - + } while (entry); -/* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) -{ - struct nf_hook_entry *entry; - unsigned int verdict; - int ret = 0; - - entry = rcu_dereference(state->hook_entries); -next_hook: - verdict = nf_iterate(skb, state, &entry); - if (verdict == NF_ACCEPT || verdict == NF_STOP) { - ret = 1; - } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { - kfree_skb(skb); - ret = NF_DROP_GETERR(verdict); - if (ret == 0) - ret = -EPERM; - } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - ret = nf_queue(skb, state, &entry, verdict); - if (ret == 1 && entry) - goto next_hook; - } - return ret; + return 1; } EXPORT_SYMBOL(nf_hook_slow); diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 234a8ec82076..4083a8051f0f 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPMAC + tristate "hash:ip,mac set support" + depends on IP_SET + help + This option adds the hash:ip,mac set type support, by which + one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_MAC tristate "hash:mac set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 3dbd5e958489..28ec148df02d 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 2e8e7e5fb4a6..6f09a99298cd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -22,6 +22,7 @@ #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_memsize IPSET_TOKEN(MTYPE, _memsize) #define mtype_flush IPSET_TOKEN(MTYPE, _flush) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) @@ -40,11 +41,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct mtype *map = set->data; - init_timer(&map->gc); - map->gc.data = (unsigned long)set; - map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&map->gc); + setup_timer(&map->gc, gc, (unsigned long)set); + mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } static void @@ -82,6 +80,16 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); + set->elements = 0; + set->ext_size = 0; +} + +/* Calculate the actual memory size of the set data */ +static size_t +mtype_memsize(const struct mtype *map, size_t dsize) +{ + return sizeof(*map) + map->memsize + + map->elements * dsize; } static int @@ -89,14 +97,15 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct mtype *map = set->data; struct nlattr *nested; - size_t memsize = sizeof(*map) + map->memsize; + size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -140,6 +149,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) { + set->elements--; ret = 0; } else if (!(flags & IPSET_FLAG_EXIST)) { set_bit(e->id, map->members); @@ -148,6 +158,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Element is re-added, cleanup extensions */ ip_set_ext_destroy(set, x); } + if (ret > 0) + set->elements--; if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT @@ -159,12 +171,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(x, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(x, set), ext); + ip_set_init_comment(set, ext_comment(x, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(x, set), ext); /* Activate element */ set_bit(e->id, map->members); + set->elements++; return 0; } @@ -181,6 +194,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; ip_set_ext_destroy(set, x); + set->elements--; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; @@ -276,6 +290,7 @@ mtype_gc(unsigned long ul_set) if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); ip_set_ext_destroy(set, x); + set->elements--; } } spin_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a748b0c2c981..23345d2d136a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); -typedef void (*destroyer)(void *); +typedef void (*destroyer)(struct ip_set *, void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { @@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); - ext->skbmark = fullmark >> 32; - ext->skbmarkmask = fullmark & 0xffffffff; + ext->skbinfo.skbmark = fullmark >> 32; + ext->skbinfo.skbmarkmask = fullmark & 0xffffffff; } if (tb[IPSET_ATTR_SKBPRIO]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbprio = be32_to_cpu(nla_get_be32( - tb[IPSET_ATTR_SKBPRIO])); + ext->skbinfo.skbprio = + be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO])); } if (tb[IPSET_ATTR_SKBQUEUE]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbqueue = be16_to_cpu(nla_get_be16( - tb[IPSET_ATTR_SKBQUEUE])); + ext->skbinfo.skbqueue = + be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE])); } return 0; } @@ -541,7 +541,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); @@ -579,7 +579,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret; BUG_ON(!set); @@ -601,7 +601,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index d32fd6b036bf..1b05d4a7d5a1 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -85,6 +85,8 @@ struct htable { }; #define hbucket(h, i) ((h)->bucket[i]) +#define ext_size(n, dsize) \ + (sizeof(struct hbucket) + (n) * (dsize)) #ifndef IPSET_NET_COUNT #define IPSET_NET_COUNT 1 @@ -150,24 +152,34 @@ htable_bits(u32 hashsize) #define INIT_CIDR(cidr, host_mask) \ DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) -#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) - #ifdef IP_SET_HASH_WITH_NET0 -/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */ -#define NLEN(family) (SET_HOST_MASK(family) + 1) +/* cidr from 0 to HOST_MASK value and c = cidr + 1 */ +#define NLEN (HOST_MASK + 1) #define CIDR_POS(c) ((c) - 1) #else -/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */ -#define NLEN(family) SET_HOST_MASK(family) +/* cidr from 1 to HOST_MASK value and c = cidr + 1 */ +#define NLEN HOST_MASK #define CIDR_POS(c) ((c) - 2) #endif #else -#define NLEN(family) 0 +#define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ #endif /* _IP_SET_HASH_GEN_H */ +#ifndef MTYPE +#error "MTYPE is not defined!" +#endif + +#ifndef HTYPE +#error "HTYPE is not defined!" +#endif + +#ifndef HOST_MASK +#error "HOST_MASK is not defined!" +#endif + /* Family dependent templates */ #undef ahash_data @@ -191,7 +203,6 @@ htable_bits(u32 hashsize) #undef mtype_same_set #undef mtype_kadt #undef mtype_uadt -#undef mtype #undef mtype_add #undef mtype_del @@ -207,6 +218,7 @@ htable_bits(u32 hashsize) #undef mtype_variant #undef mtype_data_match +#undef htype #undef HKEY #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) @@ -233,7 +245,6 @@ htable_bits(u32 hashsize) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) -#define mtype MTYPE #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) @@ -249,62 +260,54 @@ htable_bits(u32 hashsize) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) -#ifndef MTYPE -#error "MTYPE is not defined!" -#endif - -#ifndef HOST_MASK -#error "HOST_MASK is not defined!" -#endif - #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) #endif -#define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \ - & jhash_mask(htable_bits)) +#define htype MTYPE -#ifndef htype -#ifndef HTYPE -#error "HTYPE is not defined!" -#endif /* HTYPE */ -#define htype HTYPE +#define HKEY(data, initval, htable_bits) \ +({ \ + const u32 *__k = (const u32 *)data; \ + u32 __l = HKEY_DATALEN / sizeof(u32); \ + \ + BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ + \ + jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ +}) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ + struct timer_list gc; /* garbage collection when timeout enabled */ u32 maxelem; /* max elements in the hash */ - u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; /* markmask value for mark mask to store */ #endif - struct timer_list gc; /* garbage collection when timeout enabled */ - struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI u8 ahash_max; /* max elements in an array block */ #endif #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif + struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS - struct net_prefixes nets[0]; /* book-keeping of prefixes */ + struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; -#endif /* htype */ #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) +mtype_add_cidr(struct htype *h, u8 cidr, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { + for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { continue; } else if (h->nets[i].cidr[n] < cidr) { @@ -323,11 +326,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) +mtype_del_cidr(struct htype *h, u8 cidr, u8 n) { - u8 i, j, net_end = nets_length - 1; + u8 i, j, net_end = NLEN - 1; - for (i = 0; i < nets_length; i++) { + for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; @@ -343,24 +346,9 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length, size_t dsize) +mtype_ahash_memsize(const struct htype *h, const struct htable *t) { - u32 i; - struct hbucket *n; - size_t memsize = sizeof(*h) + sizeof(*t); - -#ifdef IP_SET_HASH_WITH_NETS - memsize += sizeof(struct net_prefixes) * nets_length; -#endif - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = rcu_dereference_bh(hbucket(t, i)); - if (!n) - continue; - memsize += sizeof(struct hbucket) + n->size * dsize; - } - - return memsize; + return sizeof(*h) + sizeof(*t); } /* Get the ith element from the array block n */ @@ -398,9 +386,10 @@ mtype_flush(struct ip_set *set) kfree_rcu(n, rcu); } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); + memset(h->nets, 0, sizeof(h->nets)); #endif - h->elements = 0; + set->elements = 0; + set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -444,11 +433,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct htype *h = set->data; - init_timer(&h->gc); - h->gc.data = (unsigned long)set; - h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&h->gc); + setup_timer(&h->gc, gc, (unsigned long)set); + mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); pr_debug("gc initialized, run in every %u\n", IPSET_GC_PERIOD(set->timeout)); } @@ -473,12 +459,13 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Delete expired elements from the hashtable */ static void -mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h) { struct htable *t; struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif @@ -494,21 +481,20 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) continue; } data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, set))) { - pr_debug("expired %u/%u\n", i, j); - clear_bit(j, n->used); - smp_mb__after_atomic(); + if (!ip_set_timeout_expired(ext_timeout(data, set))) + continue; + pr_debug("expired %u/%u\n", i, j); + clear_bit(j, n->used); + smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS - for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, - NCIDR_PUT(DCIDR_GET(data->cidr, - k)), - nets_length, k); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, k)), + k); #endif - ip_set_ext_destroy(set, data); - h->elements--; - d++; - } + ip_set_ext_destroy(set, data); + set->elements--; + d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { @@ -532,6 +518,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) d++; } tmp->pos = d; + set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } @@ -546,7 +533,7 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); spin_lock_bh(&set->lock); - mtype_expire(set, h, NLEN(set->family), set->dsize); + mtype_expire(set, h); spin_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; @@ -563,7 +550,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t extsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -606,6 +593,7 @@ retry: /* There can't be another parallel resizing, but dumping is possible */ atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); + extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); for (i = 0; i < jhash_size(orig->htable_bits); i++) { @@ -636,6 +624,7 @@ retry: goto cleanup; } m->size = AHASH_INIT_SIZE; + extsize = ext_size(AHASH_INIT_SIZE, dsize); RCU_INIT_POINTER(hbucket(t, key), m); } else if (m->pos >= m->size) { struct hbucket *ht; @@ -655,6 +644,7 @@ retry: memcpy(ht, m, sizeof(struct hbucket) + m->size * dsize); ht->size = m->size + AHASH_INIT_SIZE; + extsize += ext_size(AHASH_INIT_SIZE, dsize); kfree(m); m = ht; RCU_INIT_POINTER(hbucket(t, key), ht); @@ -668,6 +658,7 @@ retry: } } rcu_assign_pointer(h->table, t); + set->ext_size = extsize; spin_unlock_bh(&set->lock); @@ -715,11 +706,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool deleted = false, forceadd = false, reuse = false; u32 key, multi = 0; - if (h->elements >= h->maxelem) { + if (set->elements >= h->maxelem) { if (SET_WITH_TIMEOUT(set)) /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h, NLEN(set->family), set->dsize); - if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + mtype_expire(set, h); + if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } @@ -727,20 +718,15 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = __ipset_dereference_protected(hbucket(t, key), 1); if (!n) { - if (forceadd) { - if (net_ratelimit()) - pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; - } else if (h->elements >= h->maxelem) { + if (forceadd || set->elements >= h->maxelem) goto set_full; - } old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); if (!n) return -ENOMEM; n->size = AHASH_INIT_SIZE; + set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -778,14 +764,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, for (i = 0; i < IPSET_NET_COUNT; i++) mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), - NLEN(set->family), i); + i); #endif ip_set_ext_destroy(set, data); - h->elements--; + set->elements--; } goto copy_data; } - if (h->elements >= h->maxelem) + if (set->elements >= h->maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -804,17 +790,17 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; + set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - h->elements++; + set->elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), - NLEN(set->family), i); + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: @@ -824,7 +810,7 @@ overwrite_extensions: if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(data, set), ext); + ip_set_init_comment(set, ext_comment(data, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(data, set), ext); /* Must come last for the case when timed out entry is reused */ @@ -883,11 +869,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - h->elements--; + set->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), - NLEN(set->family), j); + j); #endif ip_set_ext_destroy(set, data); @@ -896,6 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } if (n->pos == 0 && k == 0) { + set->ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -914,6 +901,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; + set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -957,14 +945,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, int i, j = 0; #endif u32 key, multi = 0; - u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { + for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); - for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; + for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; k++) { mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), true); @@ -1021,7 +1008,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) + if (DCIDR_GET(d->cidr, i) != HOST_MASK) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); @@ -1062,7 +1049,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); + memsize = mtype_ahash_memsize(h, t) + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); @@ -1083,7 +1070,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -1238,41 +1226,35 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, struct htype *h; struct htable *t; + pr_debug("Create set %s with family %s\n", + set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); + #ifndef IP_SET_PROTO_UNDEF if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; #endif -#ifdef IP_SET_HASH_WITH_MARKMASK - markmask = 0xffffffff; -#endif -#ifdef IP_SET_HASH_WITH_NETMASK - netmask = set->family == NFPROTO_IPV4 ? 32 : 128; - pr_debug("Create set %s with family %s\n", - set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); -#endif - if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; + #ifdef IP_SET_HASH_WITH_MARKMASK /* Separated condition in order to avoid directive in argument list */ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) return -IPSET_ERR_PROTOCOL; -#endif - if (tb[IPSET_ATTR_HASHSIZE]) { - hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); - if (hashsize < IPSET_MIMINAL_HASHSIZE) - hashsize = IPSET_MIMINAL_HASHSIZE; + markmask = 0xffffffff; + if (tb[IPSET_ATTR_MARKMASK]) { + markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); + if (markmask == 0) + return -IPSET_ERR_INVALID_MARKMASK; } - - if (tb[IPSET_ATTR_MAXELEM]) - maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); +#endif #ifdef IP_SET_HASH_WITH_NETMASK + netmask = set->family == NFPROTO_IPV4 ? 32 : 128; if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); @@ -1282,33 +1264,21 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, return -IPSET_ERR_INVALID_NETMASK; } #endif -#ifdef IP_SET_HASH_WITH_MARKMASK - if (tb[IPSET_ATTR_MARKMASK]) { - markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); - if (markmask == 0) - return -IPSET_ERR_INVALID_MARKMASK; + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; } -#endif + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); hsize = sizeof(*h); -#ifdef IP_SET_HASH_WITH_NETS - hsize += sizeof(struct net_prefixes) * NLEN(set->family); -#endif h = kzalloc(hsize, GFP_KERNEL); if (!h) return -ENOMEM; - h->maxelem = maxelem; -#ifdef IP_SET_HASH_WITH_NETMASK - h->netmask = netmask; -#endif -#ifdef IP_SET_HASH_WITH_MARKMASK - h->markmask = markmask; -#endif - get_random_bytes(&h->initval, sizeof(h->initval)); - set->timeout = IPSET_NO_TIMEOUT; - hbits = htable_bits(hashsize); hsize = htable_size(hbits); if (hsize == 0) { @@ -1320,8 +1290,17 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, kfree(h); return -ENOMEM; } + h->maxelem = maxelem; +#ifdef IP_SET_HASH_WITH_NETMASK + h->netmask = netmask; +#endif +#ifdef IP_SET_HASH_WITH_MARKMASK + h->markmask = markmask; +#endif + get_random_bytes(&h->initval, sizeof(h->initval)); + t->htable_bits = hbits; - rcu_assign_pointer(h->table, t); + RCU_INIT_POINTER(h->table, t); set->data = h; #ifndef IP_SET_PROTO_UNDEF @@ -1339,6 +1318,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); } #endif + set->timeout = IPSET_NO_TIMEOUT; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); #ifndef IP_SET_PROTO_UNDEF diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 9d6bf19f7b78..20bfbd315f61 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -82,7 +82,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ip *h = set->data; + const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -101,7 +101,7 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip *h = set->data; + const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -199,7 +199,7 @@ nla_put_failure: } static inline void -hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e) +hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e) { } @@ -217,7 +217,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ip *h = set->data; + const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -234,7 +234,7 @@ static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip *h = set->data; + const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c new file mode 100644 index 000000000000..1ab5ed2f6839 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -0,0 +1,315 @@ +/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,mac type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>"); +IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:ip,mac"); + +/* Type specific function prefix */ +#define HTYPE hash_ipmac + +/* Zero valued element is not supported */ +static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; + +/* IPv4 variant */ + +/* Member elements */ +struct hash_ipmac4_elem { + /* Zero valued IP addresses cannot be stored */ + __be32 ip; + union { + unsigned char ether[ETH_ALEN]; + __be32 foo[2]; + }; +}; + +/* Common functions */ + +static inline bool +hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1, + const struct hash_ipmac4_elem *e2, + u32 *multi) +{ + return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether); +} + +static bool +hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e) +{ + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) || + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) + goto nla_put_failure; + return false; + +nla_put_failure: + return true; +} + +static inline void +hash_ipmac4_data_next(struct hash_ipmac4_elem *next, + const struct hash_ipmac4_elem *e) +{ + next->ip = e->ip; +} + +#define MTYPE hash_ipmac4 +#define PF 4 +#define HOST_MASK 32 +#define HKEY_DATALEN sizeof(struct hash_ipmac4_elem) +#include "ip_set_hash_gen.h" + +static int +hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + /* MAC can be src only */ + if (!(opt->flags & IPSET_DIM_TWO_SRC)) + return 0; + + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_ETHER] || + nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -IPSET_ERR_HASH_ELEM; + + return adtfn(set, &e, &ext, &ext, flags); +} + +/* IPv6 variant */ + +/* Member elements */ +struct hash_ipmac6_elem { + /* Zero valued IP addresses cannot be stored */ + union nf_inet_addr ip; + union { + unsigned char ether[ETH_ALEN]; + __be32 foo[2]; + }; +}; + +/* Common functions */ + +static inline bool +hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1, + const struct hash_ipmac6_elem *e2, + u32 *multi) +{ + return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) && + ether_addr_equal(e1->ether, e2->ether); +} + +static bool +hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e) +{ + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) + goto nla_put_failure; + return false; + +nla_put_failure: + return true; +} + +static inline void +hash_ipmac6_data_next(struct hash_ipmac6_elem *next, + const struct hash_ipmac6_elem *e) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK +#undef HKEY_DATALEN + +#define MTYPE hash_ipmac6 +#define PF 6 +#define HOST_MASK 128 +#define HKEY_DATALEN sizeof(struct hash_ipmac6_elem) +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac6_elem e = { + { .all = { 0 } }, + { .foo[0] = 0, .foo[1] = 0 } + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + /* MAC can be src only */ + if (!(opt->flags & IPSET_DIM_TWO_SRC)) + return 0; + + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmac6_elem e = { + { .all = { 0 } }, + { .foo[0] = 0, .foo[1] = 0 } + }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_ETHER] || + nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + if (ether_addr_equal(e.ether, invalid_ether)) + return -IPSET_ERR_HASH_ELEM; + + return adtfn(set, &e, &ext, &ext, flags); +} + +static struct ip_set_type hash_ipmac_type __read_mostly = { + .name = "hash:ip,mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_ipmac_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, + .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipmac_init(void) +{ + return ip_set_type_register(&hash_ipmac_type); +} + +static void __exit +hash_ipmac_fini(void) +{ + ip_set_type_unregister(&hash_ipmac_type); +} + +module_init(hash_ipmac_init); +module_exit(hash_ipmac_fini); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index a0695a2ab585..b64cf14e8352 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -85,7 +85,7 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -101,7 +101,7 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -193,7 +193,7 @@ nla_put_failure: } static inline void -hash_ipmark6_data_next(struct hash_ipmark4_elem *next, +hash_ipmark6_data_next(struct hash_ipmark6_elem *next, const struct hash_ipmark6_elem *d) { } @@ -211,7 +211,7 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -227,7 +227,7 @@ static int hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark *h = set->data; + const struct hash_ipmark6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9d84b3dff603..f438740e6c6a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -108,7 +108,7 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport *h = set->data; + const struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -231,7 +231,7 @@ nla_put_failure: } static inline void -hash_ipport6_data_next(struct hash_ipport4_elem *next, +hash_ipport6_data_next(struct hash_ipport6_elem *next, const struct hash_ipport6_elem *d) { next->port = d->port; @@ -266,7 +266,7 @@ static int hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport *h = set->data; + const struct hash_ipport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 215b7b942038..6215fb898c50 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -111,7 +111,7 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip *h = set->data; + const struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -241,7 +241,7 @@ nla_put_failure: } static inline void -hash_ipportip6_data_next(struct hash_ipportip4_elem *next, +hash_ipportip6_data_next(struct hash_ipportip6_elem *next, const struct hash_ipportip6_elem *d) { next->port = d->port; @@ -277,7 +277,7 @@ static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip *h = set->data; + const struct hash_ipportip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 9ca719625ea3..5ab1b99a53c2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -138,7 +138,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -163,7 +163,7 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -370,7 +370,7 @@ nla_put_failure: } static inline void -hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next, +hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next, const struct hash_ipportnet6_elem *d) { next->port = d->port; @@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -414,7 +414,7 @@ static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet *h = set->data; + const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 3e4bffdc1cc0..5d9e895452e7 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -117,7 +117,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_net *h = set->data; + const struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -139,7 +139,7 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; + const struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -268,7 +268,7 @@ nla_put_failure: } static inline void -hash_net6_data_next(struct hash_net4_elem *next, +hash_net6_data_next(struct hash_net6_elem *next, const struct hash_net6_elem *d) { } @@ -286,7 +286,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_net *h = set->data; + const struct hash_net6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f0f688db6213..44cf11939c91 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -156,7 +156,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct hash_netiface *h = set->data; + struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); -#define IFACE(dir) (par->dir ? par->dir->name : "") +#define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { @@ -196,7 +196,7 @@ static int hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct hash_netiface *h = set->data; + struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -348,7 +348,7 @@ nla_put_failure: } static inline void -hash_netiface6_data_next(struct hash_netiface4_elem *next, +hash_netiface6_data_next(struct hash_netiface6_elem *next, const struct hash_netiface6_elem *d) { } @@ -367,7 +367,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct hash_netiface *h = set->data; + struct hash_netiface6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index a93dfebffa81..db614e13b193 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -143,7 +143,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netnet *h = set->data; + const struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -165,7 +165,7 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet *h = set->data; + const struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -352,7 +352,7 @@ nla_put_failure: } static inline void -hash_netnet6_data_next(struct hash_netnet4_elem *next, +hash_netnet6_data_next(struct hash_netnet6_elem *next, const struct hash_netnet6_elem *d) { } @@ -377,7 +377,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netnet *h = set->data; + const struct hash_netnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 731813e0f08c..54b64b6cd0cd 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -133,7 +133,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netport *h = set->data; + const struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -157,7 +157,7 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport *h = set->data; + const struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -329,7 +329,7 @@ nla_put_failure: } static inline void -hash_netport6_data_next(struct hash_netport4_elem *next, +hash_netport6_data_next(struct hash_netport6_elem *next, const struct hash_netport6_elem *d) { next->port = d->port; @@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netport *h = set->data; + const struct hash_netport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), @@ -372,7 +372,7 @@ static int hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport *h = set->data; + const struct hash_netport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 9a14c237830f..aff846960ac4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -154,7 +154,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -180,7 +180,7 @@ static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -406,7 +406,7 @@ nla_put_failure: } static inline void -hash_netportnet6_data_next(struct hash_netportnet4_elem *next, +hash_netportnet6_data_next(struct hash_netportnet6_elem *next, const struct hash_netportnet6_elem *d) { next->port = d->port; @@ -432,7 +432,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -458,7 +458,7 @@ static int hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet *h = set->data; + const struct hash_netportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a2a89e4e0a14..51077c53d76b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + set->elements--; list_del_rcu(&e->list); call_rcu(&e->rcu, __list_set_del_rcu); } @@ -227,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); + ip_set_init_comment(set, ext_comment(e, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Update timeout last */ @@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, list_add_rcu(&e->list, &prev->list); else list_add_tail_rcu(&e->list, &map->members); + set->elements++; return 0; } @@ -419,6 +421,8 @@ list_set_flush(struct ip_set *set) list_for_each_entry_safe(e, n, &map->members, list) list_set_del(set, e); + set->elements = 0; + set->ext_size = 0; } static void @@ -441,12 +445,12 @@ list_set_destroy(struct ip_set *set) set->data = NULL; } -static int -list_set_head(struct ip_set *set, struct sk_buff *skb) +/* Calculate the actual memory size of the set data */ +static size_t +list_set_memsize(const struct list_set *map, size_t dsize) { - const struct list_set *map = set->data; - struct nlattr *nested; struct set_elem *e; + size_t memsize; u32 n = 0; rcu_read_lock(); @@ -454,13 +458,25 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) n++; rcu_read_unlock(); + memsize = sizeof(*map) + n * dsize; + + return memsize; +} + +static int +list_set_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct list_set *map = set->data; + struct nlattr *nested; + size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + n * set->dsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -570,11 +586,8 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) { struct list_set *map = set->data; - init_timer(&map->gc); - map->gc.data = (unsigned long)set; - map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&map->gc); + setup_timer(&map->gc, gc, (unsigned long)set); + mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } /* Create list:set type of sets */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a6e44ef2ec9a..038c2ba0ae0f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { */ /* IPVS genetlink family */ -static struct genl_family ip_vs_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = IPVS_GENL_NAME, - .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_ATTR_MAX, - .netnsok = true, /* Make ipvsadm to work on netns */ -}; +static struct genl_family ip_vs_genl_family; /* Policy used for first-level command attributes */ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { @@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; +static struct genl_family ip_vs_genl_family __ro_after_init = { + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_ATTR_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ + .module = THIS_MODULE, + .ops = ip_vs_genl_ops, + .n_ops = ARRAY_SIZE(ip_vs_genl_ops), +}; + static int __init ip_vs_genl_register(void) { - return genl_register_family_with_ops(&ip_vs_genl_family, - ip_vs_genl_ops); + return genl_register_family(&ip_vs_genl_family); } static void ip_vs_genl_unregister(void) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0f87e5d21be7..6a0bbfa8e702 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1338,7 +1338,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (skb->nfct) goto out; } - +repeat: ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { @@ -1370,6 +1370,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); + /* Special case: TCP tracker reports an attempt to reopen a + * closed/aborted connection. We have to go back and create a + * fresh conntrack. + */ + if (ret == -NF_REPEAT) + goto repeat; ret = -ret; goto out; } @@ -1377,15 +1383,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: - if (tmpl) { - /* Special case: we have to repeat this hook, assign the - * template again to this packet. We assume that this packet - * has no conntrack assigned. This is used by nf_ct_tcp. */ - if (ret == NF_REPEAT) - skb->nfct = (struct nf_conntrack *)tmpl; - else - nf_ct_put(tmpl); - } + if (tmpl) + nf_ct_put(tmpl); return ret; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8d2c7d8c666a..9bd34647225a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -281,15 +281,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; if (l4proto->l3proto >= PF_MAX) return -EBUSY; - if ((l4proto->to_nlattr && !l4proto->nlattr_size) - || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) + if ((l4proto->to_nlattr && !l4proto->nlattr_size) || + (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) return -EINVAL; mutex_lock(&nf_ct_proto_mutex); @@ -307,7 +307,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) } for (i = 0; i < MAX_NF_CT_PROTO; i++) - RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); + RCU_INIT_POINTER(proto_array[i], + &nf_conntrack_l4proto_generic); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. @@ -335,10 +336,10 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); -int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -361,9 +362,9 @@ int nf_ct_l4proto_pernet_register(struct net *net, out: return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -378,10 +379,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); -void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; @@ -395,6 +396,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); + +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL, ver; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_register_one(l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4; + pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n", + ver, l4proto[i]->name, ver); + nf_ct_l4proto_unregister(l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); + +int nf_ct_l4proto_pernet_register(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + pr_err("nf_conntrack_%s%d: pernet registration failed\n", + l4proto[i]->name, + l4proto[i]->l3proto == PF_INET6 ? 6 : 4); + nf_ct_l4proto_pernet_unregister(net, l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); + +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_unregister_one(l4proto[num_proto]); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); + +void nf_ct_l4proto_pernet_unregister(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); +} EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); int nf_conntrack_proto_pernet_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a45bee52dccc..ac8976964975 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -936,30 +936,21 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .init_net = dccp_init_net, }; +static struct nf_conntrack_l4proto *dccp_proto[] = { + &dccp_proto4, + &dccp_proto6, +}; + static __net_init int dccp_net_init(struct net *net) { - int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); - if (ret < 0) { - pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); - if (ret < 0) { - pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); - goto cleanup_dccp4; - } - return 0; -cleanup_dccp4: - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, dccp_proto, + ARRAY_SIZE(dccp_proto)); } static __net_exit void dccp_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, dccp_proto, + ARRAY_SIZE(dccp_proto)); } static struct pernet_operations dccp_net_ops = { @@ -975,29 +966,16 @@ static int __init nf_conntrack_proto_dccp_init(void) ret = register_pernet_subsys(&dccp_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&dccp_proto4); - if (ret < 0) - goto out_dccp4; - - ret = nf_ct_l4proto_register(&dccp_proto6); + return ret; + ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto)); if (ret < 0) - goto out_dccp6; - - return 0; -out_dccp6: - nf_ct_l4proto_unregister(&dccp_proto4); -out_dccp4: - unregister_pernet_subsys(&dccp_net_ops); -out_pernet: + unregister_pernet_subsys(&dccp_net_ops); return ret; } static void __exit nf_conntrack_proto_dccp_fini(void) { - nf_ct_l4proto_unregister(&dccp_proto6); - nf_ct_l4proto_unregister(&dccp_proto4); + nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto)); unregister_pernet_subsys(&dccp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9a715f88b2f1..ff405c9183f1 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); + + ret = nf_ct_l4proto_pernet_register_one(net, + &nf_conntrack_l4proto_gre4); if (ret < 0) pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; @@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net) static void proto_gre_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void) ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); if (ret < 0) goto out_gre4; @@ -436,7 +437,7 @@ out_pernet: static void __exit nf_ct_proto_gre_fini(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 982ea62606c7..17c0ade23fd8 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -816,32 +816,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .init_net = sctp_init_net, }; +static struct nf_conntrack_l4proto *sctp_proto[] = { + &nf_conntrack_l4proto_sctp4, + &nf_conntrack_l4proto_sctp6, +}; + static int sctp_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); - if (ret < 0) { - pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); - if (ret < 0) { - pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); - goto cleanup_sctp4; - } - return 0; - -cleanup_sctp4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, sctp_proto, + ARRAY_SIZE(sctp_proto)); } static void sctp_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, sctp_proto, + ARRAY_SIZE(sctp_proto)); } static struct pernet_operations sctp_net_ops = { @@ -857,29 +846,16 @@ static int __init nf_conntrack_proto_sctp_init(void) ret = register_pernet_subsys(&sctp_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); - if (ret < 0) - goto out_sctp4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); + return ret; + ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto)); if (ret < 0) - goto out_sctp6; - - return 0; -out_sctp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); -out_sctp4: - unregister_pernet_subsys(&sctp_net_ops); -out_pernet: + unregister_pernet_subsys(&sctp_net_ops); return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto)); unregister_pernet_subsys(&sctp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 029206e8dec4..8cdb4b1bf933 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -336,32 +336,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .init_net = udplite_init_net, }; +static struct nf_conntrack_l4proto *udplite_proto[] = { + &nf_conntrack_l4proto_udplite4, + &nf_conntrack_l4proto_udplite6, +}; + static int udplite_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); - if (ret < 0) { - pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); - if (ret < 0) { - pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); - goto cleanup_udplite4; - } - return 0; - -cleanup_udplite4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, udplite_proto, + ARRAY_SIZE(udplite_proto)); } static void udplite_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, udplite_proto, + ARRAY_SIZE(udplite_proto)); } static struct pernet_operations udplite_net_ops = { @@ -377,29 +366,16 @@ static int __init nf_conntrack_proto_udplite_init(void) ret = register_pernet_subsys(&udplite_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); - if (ret < 0) - goto out_udplite4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); + return ret; + ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto)); if (ret < 0) - goto out_udplite6; - - return 0; -out_udplite6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); -out_udplite4: - unregister_pernet_subsys(&udplite_net_ops); -out_pernet: + unregister_pernet_subsys(&udplite_net_ops); return ret; } static void __exit nf_conntrack_proto_udplite_exit(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto)); unregister_pernet_subsys(&udplite_net_ops); } diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 7ec69723940f..44ae986c383f 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,7 +19,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) struct net_device *dev; struct sk_buff *skb; - dev = dev_get_by_index_rcu(pkt->net, oif); + dev = dev_get_by_index_rcu(nft_net(pkt), oif); if (dev == NULL) return; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 9fdb655f85bc..c46d214d5323 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -11,11 +11,6 @@ #define NFDEBUG(format, args...) #endif - -/* core.c */ -unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp); - /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry **entryp, unsigned int verdict); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 119fe1cb1ea9..ed9b80815fa0 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +/* bridge and netdev logging families share this code. */ +void nf_log_l2packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} +EXPORT_SYMBOL_GPL(nf_log_l2packet); + static int __init nf_log_common_init(void) { return 0; diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c new file mode 100644 index 000000000000..1f645949f3d8 --- /dev/null +++ b/net/netfilter/nf_log_netdev.c @@ -0,0 +1,80 @@ +/* + * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_log.h> + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_netdev_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); +} + +static void __net_exit nf_log_netdev_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_netdev_net_ops = { + .init = nf_log_netdev_net_init, + .exit = nf_log_netdev_net_exit, +}; + +static int __init nf_log_netdev_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_netdev_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + return 0; +} + +static void __exit nf_log_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_log_netdev_net_ops); + nf_log_unregister(&nf_netdev_logger); +} + +module_init(nf_log_netdev_init); +module_exit(nf_log_netdev_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter netdev packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8f08d759844a..77cba9f6ccb6 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int queuenum) + struct nf_hook_entry *hook_entry, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, + .hook = hook_entry, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry *entry = *entryp; int ret; - RCU_INIT_POINTER(state->hook_entries, entry); - ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { @@ -177,17 +177,34 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } +static unsigned int nf_iterate(struct sk_buff *skb, + struct nf_hook_state *state, + struct nf_hook_entry **entryp) +{ + unsigned int verdict; + + do { +repeat: + verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); + if (verdict != NF_ACCEPT) { + if (verdict != NF_REPEAT) + return verdict; + goto repeat; + } + *entryp = rcu_dereference((*entryp)->next); + } while (*entryp); + + return NF_ACCEPT; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry; + struct nf_hook_entry *hook_entry = entry->hook; + struct nf_hook_ops *elem = &hook_entry->ops; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; - struct nf_hook_ops *elem; int err; - hook_entry = rcu_dereference(entry->state.hook_entries); - elem = &hook_entry->ops; - nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ @@ -200,8 +217,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict = NF_DROP; } - entry->state.thresh = INT_MIN; - if (verdict == NF_ACCEPT) { hook_entry = rcu_dereference(hook_entry->next); if (hook_entry) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0dd5c695482f..65dbeadcb118 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, nft_trace_notify(info); - nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], rulenum); } static inline void nft_trace_packet(struct nft_traceinfo *info, @@ -124,7 +124,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; - const struct net *net = pkt->net; + const struct net *net = nft_net(pkt); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; @@ -178,6 +178,7 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + case NF_STOLEN: nft_trace_packet(&info, chain, rule, rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; @@ -231,68 +232,40 @@ next_rule: } EXPORT_SYMBOL_GPL(nft_do_chain); +static struct nft_expr_type *nft_basic_types[] = { + &nft_imm_type, + &nft_cmp_type, + &nft_lookup_type, + &nft_bitwise_type, + &nft_byteorder_type, + &nft_payload_type, + &nft_dynset_type, + &nft_range_type, +}; + int __init nf_tables_core_module_init(void) { - int err; - - err = nft_immediate_module_init(); - if (err < 0) - goto err1; - - err = nft_cmp_module_init(); - if (err < 0) - goto err2; - - err = nft_lookup_module_init(); - if (err < 0) - goto err3; - - err = nft_bitwise_module_init(); - if (err < 0) - goto err4; + int err, i; - err = nft_byteorder_module_init(); - if (err < 0) - goto err5; - - err = nft_payload_module_init(); - if (err < 0) - goto err6; - - err = nft_dynset_module_init(); - if (err < 0) - goto err7; - - err = nft_range_module_init(); - if (err < 0) - goto err8; + for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) { + err = nft_register_expr(nft_basic_types[i]); + if (err) + goto err; + } return 0; -err8: - nft_dynset_module_exit(); -err7: - nft_payload_module_exit(); -err6: - nft_byteorder_module_exit(); -err5: - nft_bitwise_module_exit(); -err4: - nft_lookup_module_exit(); -err3: - nft_cmp_module_exit(); -err2: - nft_immediate_module_exit(); -err1: + +err: + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); return err; } void nf_tables_core_module_exit(void) { - nft_dynset_module_exit(); - nft_payload_module_exit(); - nft_byteorder_module_exit(); - nft_bitwise_module_exit(); - nft_lookup_module_exit(); - nft_cmp_module_exit(); - nft_immediate_module_exit(); + int i; + + i = ARRAY_SIZE(nft_basic_types); + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index ab695f8e2d29..12eb9041dca2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info) unsigned int size; int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; - if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + @@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) @@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info) goto nla_put_failure; if (!info->packet_dumped) { - if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt))) goto nla_put_failure; if (nf_trace_fill_pkt_info(skb, pkt)) @@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info) } nlmsg_end(skb, nlh); - nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index eb086a192c5a..7435505037b7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c526048..1e33115b399f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -69,7 +69,7 @@ struct nfqnl_instance { * Following fields are dirtied for each queued packet, * keep them in same cache line if possible. */ - spinlock_t lock; + spinlock_t lock ____cacheline_aligned_in_smp; unsigned int queue_total; unsigned int id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ @@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = { static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return rcu_access_pointer(entry->state.hook_entries) == + return rcu_access_pointer(entry->hook) == (struct nf_hook_entry *)entry_ptr; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 31c15ed2e5fc..877d9acd91ef 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -121,7 +121,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_bitwise_type; static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), @@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = { .dump = nft_bitwise_dump, }; -static struct nft_expr_type nft_bitwise_type __read_mostly = { +struct nft_expr_type nft_bitwise_type __read_mostly = { .name = "bitwise", .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -int __init nft_bitwise_module_init(void) -{ - return nft_register_expr(&nft_bitwise_type); -} - -void nft_bitwise_module_exit(void) -{ - nft_unregister_expr(&nft_bitwise_type); -} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index ee63d981268d..13d4e421a6b3 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -169,7 +169,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_byteorder_type; static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), @@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = { .dump = nft_byteorder_dump, }; -static struct nft_expr_type nft_byteorder_type __read_mostly = { +struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, }; - -int __init nft_byteorder_module_init(void) -{ - return nft_register_expr(&nft_byteorder_type); -} - -void nft_byteorder_module_exit(void) -{ - nft_unregister_expr(&nft_byteorder_type); -} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2e53739812b1..2b96effeadc1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; @@ -110,7 +107,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_cmp_type; static const struct nft_expr_ops nft_cmp_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), @@ -211,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp_ops; } -static struct nft_expr_type nft_cmp_type __read_mostly = { +struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, }; - -int __init nft_cmp_module_init(void) -{ - return nft_register_expr(&nft_cmp_type); -} - -void nft_cmp_module_exit(void) -{ - nft_unregister_expr(&nft_cmp_type); -} diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d7b0d171172a..6837348c8993 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = { .owner = THIS_MODULE, }; +static void nft_notrack_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(pkt->skb, &ctinfo); + /* Previously seen (loopback or untracked)? Ignore. */ + if (ct) + return; + + ct = nf_ct_untracked_get(); + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; +} + +static struct nft_expr_type nft_notrack_type; +static const struct nft_expr_ops nft_notrack_ops = { + .type = &nft_notrack_type, + .size = NFT_EXPR_SIZE(0), + .eval = nft_notrack_eval, +}; + +static struct nft_expr_type nft_notrack_type __read_mostly = { + .name = "notrack", + .ops = &nft_notrack_ops, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { + int err; + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); - return nft_register_expr(&nft_ct_type); + err = nft_register_expr(&nft_ct_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_notrack_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_expr(&nft_ct_type); + return err; } static void __exit nft_ct_module_exit(void) { + nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("ct"); +MODULE_ALIAS_NFT_EXPR("notrack"); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 31ca94793aa9..7de2f46734a4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -268,7 +268,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_dynset_type; static const struct nft_expr_ops nft_dynset_ops = { .type = &nft_dynset_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), @@ -278,20 +277,10 @@ static const struct nft_expr_ops nft_dynset_ops = { .dump = nft_dynset_dump, }; -static struct nft_expr_type nft_dynset_type __read_mostly = { +struct nft_expr_type nft_dynset_type __read_mostly = { .name = "dynset", .ops = &nft_dynset_ops, .policy = nft_dynset_policy, .maxattr = NFTA_DYNSET_MAX, .owner = THIS_MODULE, }; - -int __init nft_dynset_module_init(void) -{ - return nft_register_expr(&nft_dynset_type); -} - -void nft_dynset_module_exit(void) -{ - nft_unregister_expr(&nft_dynset_type); -} diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c new file mode 100644 index 000000000000..249c9b80c150 --- /dev/null +++ b/net/netfilter/nft_fib.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Generic part shared by ipv4 and ipv6 backends. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { + [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_RESULT] = { .type = NLA_U32 }, + [NFTA_FIB_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL(nft_fib_policy); + +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIFNAME: + hooks = (1 << NF_INET_PRE_ROUTING); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + else if (priv->flags & NFTA_FIB_F_OIF) + hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_FORWARD); + else + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING); + + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} +EXPORT_SYMBOL_GPL(nft_fib_validate); + +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) + return -EINVAL; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); + + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL)) + return -EINVAL; + + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) + return -EINVAL; + + priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = sizeof(int); + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + len = sizeof(u32); + break; + default: + return -EINVAL; + } + + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); + if (err < 0) + return err; + + return nft_fib_validate(ctx, expr, NULL); +} +EXPORT_SYMBOL_GPL(nft_fib_init); + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) + return -1; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fib_dump); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index) +{ + struct net_device *dev; + u32 *dreg = reg; + + switch (r) { + case NFT_FIB_RESULT_OIF: + *dreg = index; + break; + case NFT_FIB_RESULT_OIFNAME: + dev = dev_get_by_index_rcu(nft_net(pkt), index); + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + *dreg = 0; + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib_store_result); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c new file mode 100644 index 000000000000..9120fc7228f4 --- /dev/null +++ b/net/netfilter/nft_fib_inet.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +#include <net/netfilter/nft_fib.h> + +static void nft_fib_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case NFPROTO_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_fib_inet_type; +static const struct nft_expr_ops nft_fib_inet_ops = { + .type = &nft_fib_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_inet_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "fib", + .ops = &nft_fib_inet_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_inet_module_init(void) +{ + return nft_register_expr(&nft_fib_inet_type); +} + +static void __exit nft_fib_inet_module_exit(void) +{ + nft_unregister_expr(&nft_fib_inet_type); +} + +module_init(nft_fib_inet_module_init); +module_exit(nft_fib_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index baf694de3935..97ad8e30e4b4 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -57,7 +57,6 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (!tb[NFTA_HASH_SREG] || !tb[NFTA_HASH_DREG] || !tb[NFTA_HASH_LEN] || - !tb[NFTA_HASH_SEED] || !tb[NFTA_HASH_MODULUS]) return -EINVAL; @@ -80,7 +79,10 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + if (tb[NFTA_HASH_SEED]) + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + else + get_random_bytes(&priv->seed, sizeof(priv->seed)); return nft_validate_register_load(priv->sreg, len) && nft_validate_register_store(ctx, priv->dreg, NULL, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index d17018ff54e6..728baf88295a 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (desc.len > U8_MAX) - return -ERANGE; - priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); @@ -105,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_type nft_imm_type; static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -116,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = { .validate = nft_immediate_validate, }; -static struct nft_expr_type nft_imm_type __read_mostly = { +struct nft_expr_type nft_imm_type __read_mostly = { .name = "immediate", .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, .owner = THIS_MODULE, }; - -int __init nft_immediate_module_init(void) -{ - return nft_register_expr(&nft_imm_type); -} - -void nft_immediate_module_exit(void) -{ - nft_unregister_expr(&nft_imm_type); -} diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 1b01404bb33f..6271e40a3dd6 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -32,8 +32,9 @@ static void nft_log_eval(const struct nft_expr *expr, { const struct nft_log *priv = nft_expr_priv(expr); - nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &priv->loginfo, "%s", priv->prefix); + nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", + priv->prefix); } static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 8166b6994cc7..d4f97fa7e21d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_set_ext *ext; bool found; - found = set->ops->lookup(pkt->net, set, ®s->data[priv->sreg], &ext) ^ - priv->invert; - + found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], + &ext) ^ priv->invert; if (!found) { regs->verdict.code = NFT_BREAK; return; @@ -155,7 +154,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_lookup_type; static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -165,20 +163,10 @@ static const struct nft_expr_ops nft_lookup_ops = { .dump = nft_lookup_dump, }; -static struct nft_expr_type nft_lookup_type __read_mostly = { +struct nft_expr_type nft_lookup_type __read_mostly = { .name = "lookup", .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, .owner = THIS_MODULE, }; - -int __init nft_lookup_module_init(void) -{ - return nft_register_expr(&nft_lookup_type); -} - -void nft_lookup_module_exit(void) -{ - nft_unregister_expr(&nft_lookup_type); -} diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6c1e0246706e..66c7f4b4c49b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); struct sock *sk; u32 *dest = ®s->data[priv->dreg]; @@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - *dest = pkt->pf; + *dest = nft_pf(pkt); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) @@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) *dest = PACKET_MULTICAST; @@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = 1 << NF_INET_PRE_ROUTING; + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 55bc5ab78d4a..a66b36097b8f 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, return -EOVERFLOW; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - atomic_set(&priv->counter, 0); + atomic_set(&priv->counter, priv->modulus - 1); return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index b2f88617611a..98fb5d7b8087 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -148,7 +148,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_payload_type; static const struct nft_expr_ops nft_payload_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), @@ -320,20 +319,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return &nft_payload_ops; } -static struct nft_expr_type nft_payload_type __read_mostly = { +struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, }; - -int __init nft_payload_module_init(void) -{ - return nft_register_expr(&nft_payload_type); -} - -void nft_payload_module_exit(void) -{ - nft_unregister_expr(&nft_payload_type); -} diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 393d359a1889..3e19fa1230dc 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -43,7 +43,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, pkt->pf, + priv->queues_total, nft_pf(pkt), jhash_initval); } } diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index fbc88009ca2e..009062606697 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -122,7 +122,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_range_type; static const struct nft_expr_ops nft_range_ops = { .type = &nft_range_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), @@ -131,20 +130,10 @@ static const struct nft_expr_ops nft_range_ops = { .dump = nft_range_dump, }; -static struct nft_expr_type nft_range_type __read_mostly = { +struct nft_expr_type nft_range_type __read_mostly = { .name = "range", .ops = &nft_range_ops, .policy = nft_range_policy, .maxattr = NFTA_RANGE_MAX, .owner = THIS_MODULE, }; - -int __init nft_range_module_init(void) -{ - return nft_register_expr(&nft_range_type); -} - -void nft_range_module_exit(void) -{ - nft_unregister_expr(&nft_range_type); -} diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index e79d9ca2ffee..9e90a02cb104 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->hook); + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->net, pkt->skb, pkt->hook); + nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, + priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, + nf_send_unreach6(nft_net(pkt), pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c new file mode 100644 index 000000000000..d3eb640bc784 --- /dev/null +++ b/net/netfilter/nft_rt.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/dst.h> +#include <net/ip6_route.h> +#include <net/route.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +struct nft_rt { + enum nft_rt_keys key:8; + enum nft_registers dreg:8; +}; + +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + u32 *dest = ®s->data[priv->dreg]; + const struct dst_entry *dst; + + dst = skb_dst(skb); + if (!dst) + goto err; + + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: + *dest = dst->tclassid; + break; +#endif + case NFT_RT_NEXTHOP4: + if (nft_pf(pkt) != NFPROTO_IPV4) + goto err; + + *dest = rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); + break; + case NFT_RT_NEXTHOP6: + if (nft_pf(pkt) != NFPROTO_IPV6) + goto err; + + memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + &ipv6_hdr(skb)->daddr), + sizeof(struct in6_addr)); + break; + default: + WARN_ON(1); + goto err; + } + return; + +err: + regs->verdict.code = NFT_BREAK; +} + +const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { + [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_KEY] = { .type = NLA_U32 }, +}; + +int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_rt *priv = nft_expr_priv(expr); + unsigned int len; + + if (tb[NFTA_RT_KEY] == NULL || + tb[NFTA_RT_DREG] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY])); + switch (priv->key) { +#ifdef CONFIG_IP_ROUTE_CLASSID + case NFT_RT_CLASSID: +#endif + case NFT_RT_NEXTHOP4: + len = sizeof(u32); + break; + case NFT_RT_NEXTHOP6: + len = sizeof(struct in6_addr); + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); +} + +int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_rt_type; +static const struct nft_expr_ops nft_rt_get_ops = { + .type = &nft_rt_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)), + .eval = nft_rt_get_eval, + .init = nft_rt_get_init, + .dump = nft_rt_get_dump, +}; + +static struct nft_expr_type nft_rt_type __read_mostly = { + .name = "rt", + .ops = &nft_rt_get_ops, + .policy = nft_rt_policy, + .maxattr = NFTA_RT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_rt_module_init(void) +{ + return nft_register_expr(&nft_rt_type); +} + +static void __exit nft_rt_module_exit(void) +{ + nft_unregister_expr(&nft_rt_type); +} + +module_init(nft_rt_module_init); +module_exit(nft_rt_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>"); +MODULE_ALIAS_NFT_EXPR("rt"); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fc4977456c30..ad818e52859b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -982,7 +982,7 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +/* Find table by name, grabs mutex & ref. Returns NULL on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4973cbddc446..19247a17e511 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) goto errout; audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, par->hooknum, skb->len, - par->in ? par->in->name : "?", - par->out ? par->out->name : "?"); + info->type, xt_hooknum(par), skb->len, + xt_in(par) ? xt_inname(par) : "?", + xt_out(par) ? xt_outname(par) : "?"); if (skb->mark) audit_log_format(ab, " mark=%#x", skb->mark); @@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, ntohs(eth_hdr(skb)->h_proto)); - if (par->family == NFPROTO_BRIDGE) { + if (xt_family(par) == NFPROTO_BRIDGE) { switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): audit_ip4(ab, skb); @@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) } } - switch (par->family) { + switch (xt_family(par)) { case NFPROTO_IPV4: audit_ip4(ab, skb); break; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 1763ab82bcd7..c3b2017ebe41 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -32,15 +32,15 @@ static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, - &li, "%s", loginfo->prefix); + nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", loginfo->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index b253e07cb1c5..94d0b5411192 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_addr.in6 = ipv6_hdr(skb)->daddr; else new_addr.in6 = ipv6_hdr(skb)->saddr; @@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) @@ -72,16 +72,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); + NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_POST_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT || + xt_hooknum(par) == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -96,7 +96,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg4_check(const struct xt_tgchk_param *par) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 8668a5c18dc3..c7f8958cea4a 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -25,8 +25,8 @@ static unsigned int nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; @@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; - nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 8f1779ff7e30..a360b99a958a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } @@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } } diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 03f0b370e178..651dce65a30b 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -31,7 +31,7 @@ static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par)); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) @@ -62,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 872db2d0e2a9..27241a767f17 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = par->net; + struct net *net = xt_net(par); unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); @@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, * length IPv6 header of 60, ergo the default MSS value is 1220 * Since no MSS was provided, we must use the default values */ - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) newmss = min(newmss, (u16)536); else newmss = min(newmss, (u16)1220); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 0471db4032c5..1c57ace75ae6 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif); + nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } @@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif); + nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 663c4c3c9072..dbd72cc40e42 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, + tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, + tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, - par->in, NFT_LOOKUP_ESTABLISHED); + xt_in(par), NFT_LOOKUP_ESTABLISHED); laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; @@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, laddr, hp->source, lport, - par->in, NFT_LOOKUP_LISTENER); + xt_in(par), NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && tproxy_sk_is_transparent(sk)) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 11d6091991a4..e329dabde35f 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; + dev = xt_in(par); else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; + dev = xt_out(par); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - if (par->family == NFPROTO_IPV6) + if (xt_family(par) == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif iph = ip_hdr(skb); diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 96fa26b20b67..9a9884a39c0e 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ - if (!xt_cluster_is_multicast_addr(skb, par->family) && + if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b6dc322593a3..bb3845339efd 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, net, &tuple)) { + xt_family(par), net, &tuple)) { goto hotdrop; } - if (par->family == NFPROTO_IPV6) { + if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); @@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family, zone); + &info->mask, xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index a3b8f697cfc5..2dea15ebc55b 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, par->family) ^ + if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, par->family) ^ + if (conntrack_mt_origdst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, par->family) ^ + if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, par->family) ^ + if (conntrack_mt_repldst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index d9202cdd25c9..96ebe1cdefec 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_devgroup_info *info = par->matchinfo; if (info->flags & XT_DEVGROUP_MATCH_SRC && - (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) return false; if (info->flags & XT_DEVGROUP_MATCH_DST && - (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) return false; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 64670fc5d0e1..236ac8008909 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 71a9d95e0a81..0fdc89064488 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -48,9 +48,9 @@ static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; - struct netns_ipvs *ipvs = net_ipvs(par->net); + struct netns_ipvs *ipvs = net_ipvs(xt_net(par)); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ - const u_int8_t family = par->family; + const u_int8_t family = xt_family(par); struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index ac1d3c3d09e7..ec06fb1cb16f 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -42,29 +42,31 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, e = minfo->ports[++i]; pr_debug("src or dst matches with %d-%d?\n", s, e); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src >= s && src <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst >= s && dst <= e) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && ((dst >= s && dst <= e) - || (src >= s && src <= e))) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src >= s && src <= e) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst >= s && dst <= e) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return ((dst >= s && dst <= e) || + (src >= s && src <= e)) ^ minfo->invert; + default: + break; + } } else { /* exact port matching */ pr_debug("src or dst matches with %d?\n", s); - if (minfo->flags == XT_MULTIPORT_SOURCE - && src == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_DESTINATION - && dst == s) - return true ^ minfo->invert; - if (minfo->flags == XT_MULTIPORT_EITHER - && (src == s || dst == s)) - return true ^ minfo->invert; + switch (minfo->flags) { + case XT_MULTIPORT_SOURCE: + return (src == s) ^ minfo->invert; + case XT_MULTIPORT_DESTINATION: + return (dst == s) ^ minfo->invert; + case XT_MULTIPORT_EITHER: + return (src == s || dst == s) ^ minfo->invert; + default: + break; + } } } diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index cf327593852a..cc0518fe598e 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); + overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..c05fefcec238 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = p->net; + struct net *net = xt_net(p); if (!info) return false; @@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, + xt_in(p), xt_out(p), NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(net, p->family, p->hooknum, skb, p->in, - p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), + xt_out(p), NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index a20e731b5b6c..16477df45b3b 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); - struct net *net = par->net; + struct net *net = xt_net(par); if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 5b645cb598fc..57efb703ff18 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (par->family == NFPROTO_IPV4 && + else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (par->family == NFPROTO_IPV6 && + else if (xt_family(par) == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f23e97bb42d7..2b4ab189bba7 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, par->family); + ret = match_policy_in(skb, info, xt_family(par)); else - ret = match_policy_out(skb, info, par->family); + ret = match_policy_out(skb, info, xt_family(par)); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index e3b7a09b103e..bf250000e084 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; @@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int8_t ttl; bool ret = info->invert; - if (par->family == NFPROTO_IPV4) { + if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) @@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* use TTL as seen before forwarding */ - if (par->out != NULL && skb->sk == NULL) + if (xt_out(par) != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); @@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) nf_inet_addr_mask(&addr, &addr_mask, &t->mask); - e = recent_entry_lookup(t, &addr_mask, par->family, + e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, &addr_mask, par->family, ttl); + e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5669e5b453f4..64285702afd5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); return match_set(info->match_set.index, skb, par, &opt, @@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, 0, UINT_MAX); if (opt.flags & IPSET_RETURN_NOMATCH) @@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v3 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v4 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -423,17 +423,19 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Revision 3 target */ +#define MOPT(opt, member) ((opt).ext.skbinfo.member) + static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; int ret; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); - ADT_OPT(map_opt, par->family, info->map_set.dim, + ADT_OPT(map_opt, xt_family(par), info->map_set.dim, info->map_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) if (!ret) return XT_CONTINUE; if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) - skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) - ^ (map_opt.ext.skbmark); + skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask)) + ^ MOPT(map_opt, skbmark); if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) - skb->priority = map_opt.ext.skbprio; + skb->priority = MOPT(map_opt, skbprio); if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && skb->dev && - skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) - skb_set_queue_mapping(skb, map_opt.ext.skbqueue); + skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue)) + skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue)); } return XT_CONTINUE; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..2198914707f5 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,76 +22,14 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -#define XT_SOCKET_HAVE_IPV6 1 #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/inet6_hashtables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif +#include <net/netfilter/nf_socket.h> #include <linux/netfilter/xt_socket.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#define XT_SOCKET_HAVE_CONNTRACK 1 -#include <net/netfilter/nf_conntrack.h> -#endif - -static int -extract_icmp4_fields(const struct sk_buff *skb, - u8 *protocol, - __be32 *raddr, - __be32 *laddr, - __be16 *rport, - __be16 *lport) -{ - unsigned int outside_hdrlen = ip_hdrlen(skb); - struct iphdr *inside_iph, _inside_iph; - struct icmphdr *icmph, _icmph; - __be16 *ports, _ports[2]; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - switch (icmph->type) { - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_REDIRECT: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return 1; - } - - inside_iph = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr), - sizeof(_inside_iph), &_inside_iph); - if (inside_iph == NULL) - return 1; - - if (inside_iph->protocol != IPPROTO_TCP && - inside_iph->protocol != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr) + - (inside_iph->ihl << 2), - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_iph->protocol; - *laddr = inside_iph->saddr; - *lport = ports[0]; - *raddr = inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - /* "socket" match based redirection (no specific rule) * =================================================== * @@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb, * then local services could intercept traffic going through the * box. */ -static struct sock * -xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - return NULL; -} - -static bool xt_socket_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - - default: - return inet_sk(sk)->transparent; - } -} - -static struct sock *xt_socket_lookup_slow_v4(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - const struct iphdr *iph = ip_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); - u8 uninitialized_var(protocol); -#ifdef XT_SOCKET_HAVE_CONNTRACK - struct nf_conn const *ct; - enum ip_conntrack_info ctinfo; -#endif - - if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - protocol = iph->protocol; - saddr = iph->saddr; - sport = hp->source; - daddr = iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = iph->protocol == IPPROTO_TCP ? - ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : - ip_hdrlen(skb) + sizeof(*hp); - - } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return NULL; - } else { - return NULL; - } - -#ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in - * case this is a reply packet of an established - * SNAT-ted connection. - */ - ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && - ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_ESTABLISHED_REPLY) || - (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_RELATED_REPLY)) && - (ct->status & IPS_SRC_NAT_DONE)) { - - daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; - dport = (iph->protocol == IPPROTO_TCP) ? - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - } -#endif - - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); -} - static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) return socket_match(skb, par, par->matchinfo); } -#ifdef XT_SOCKET_HAVE_IPV6 - -static int -extract_icmp6_fields(const struct sk_buff *skb, - unsigned int outside_hdrlen, - int *protocol, - const struct in6_addr **raddr, - const struct in6_addr **laddr, - __be16 *rport, - __be16 *lport, - struct ipv6hdr *ipv6_var) -{ - const struct ipv6hdr *inside_iph; - struct icmp6hdr *icmph, _icmph; - __be16 *ports, _ports[2]; - u8 inside_nexthdr; - __be16 inside_fragoff; - int inside_hdrlen; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) - return 1; - - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), - sizeof(*ipv6_var), ipv6_var); - if (inside_iph == NULL) - return 1; - inside_nexthdr = inside_iph->nexthdr; - - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + - sizeof(*ipv6_var), - &inside_nexthdr, &inside_fragoff); - if (inside_hdrlen < 0) - return 1; /* hjm: Packet has no/incomplete transport layer headers. */ - - if (inside_nexthdr != IPPROTO_TCP && - inside_nexthdr != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, inside_hdrlen, - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_nexthdr; - *laddr = &inside_iph->saddr; - *lport = ports[0]; - *raddr = &inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - -static struct sock * -xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - - return NULL; -} - -static struct sock *xt_socket_lookup_slow_v6(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - __be16 uninitialized_var(dport), uninitialized_var(sport); - const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - int thoff = 0, tproto; - - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { - pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NULL; - } - - if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - saddr = &iph->saddr; - sport = hp->source; - daddr = &iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = tproto == IPPROTO_TCP ? - thoff + __tcp_hdrlen((struct tcphdr *)hp) : - thoff + sizeof(*hp); - - } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport, &ipv6_var)) - return NULL; - } else { - return NULL; - } - - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); -} - +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { @@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, @@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, @@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, @@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_defrag_ipv6_enable(); #endif diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 2ec93c5e77bb..d177dd066504 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CALIPSO family */ -static struct genl_family netlbl_calipso_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_CALIPSO_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CALIPSO_A_MAX, -}; +static struct genl_family netlbl_calipso_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { @@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = { }, }; +static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CALIPSO_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CALIPSO_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_calipso_ops, + .n_ops = ARRAY_SIZE(netlbl_calipso_ops), +}; + /* NetLabel Generic NETLINK Protocol Functions */ @@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = { */ int __init netlbl_calipso_genl_init(void) { - return genl_register_family_with_ops(&netlbl_calipso_gnl_family, - netlbl_calipso_ops); + return genl_register_family(&netlbl_calipso_gnl_family); } static const struct netlbl_calipso_ops *calipso_ops; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 7fd1104ba900..4149d3e63589 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_cipsov4_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_CIPSOV4_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CIPSOV4_A_MAX, -}; - +static struct genl_family netlbl_cipsov4_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, @@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { }, }; +static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CIPSOV4_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_cipsov4_ops, + .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { */ int __init netlbl_cipsov4_genl_init(void) { - return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family, - netlbl_cipsov4_ops); + return genl_register_family(&netlbl_cipsov4_gnl_family); } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index f85d0e07af2d..21e0095b1d14 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_mgmt_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_MGMT_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_MGMT_A_MAX, -}; +static struct genl_family netlbl_mgmt_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { @@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { }, }; +static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_MGMT_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_mgmt_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { */ int __init netlbl_mgmt_genl_init(void) { - return genl_register_family_with_ops(&netlbl_mgmt_gnl_family, - netlbl_mgmt_genl_ops); + return genl_register_family(&netlbl_mgmt_gnl_family); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4528cff9138b..22dc1b9d6362 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ -static struct genl_family netlbl_unlabel_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_UNLABELED_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_UNLABEL_A_MAX, -}; +static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { @@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { }, }; +static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_UNLABEL_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_unlabel_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { */ int __init netlbl_unlabel_genl_init(void) { - return genl_register_family_with_ops(&netlbl_unlabel_gnl_family, - netlbl_unlabel_genl_ops); + return genl_register_family(&netlbl_unlabel_gnl_family); } /* diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 49c28e8ef01b..fb6e10fdb217 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -17,6 +17,7 @@ #include <linux/mutex.h> #include <linux/bitmap.h> #include <linux/rwsem.h> +#include <linux/idr.h> #include <net/sock.h> #include <net/genetlink.h> @@ -58,10 +59,8 @@ static void genl_unlock_all(void) up_write(&cb_lock); } -#define GENL_FAM_TAB_SIZE 16 -#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) +static DEFINE_IDR(genl_fam_idr); -static struct list_head family_ht[GENL_FAM_TAB_SIZE]; /* * Bitmap of multicast groups that are currently in use. * @@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id); -static inline unsigned int genl_family_hash(unsigned int id) +static const struct genl_family *genl_family_find_byid(unsigned int id) { - return id & GENL_FAM_TAB_MASK; + return idr_find(&genl_fam_idr, id); } -static inline struct list_head *genl_family_chain(unsigned int id) +static const struct genl_family *genl_family_find_byname(char *name) { - return &family_ht[genl_family_hash(id)]; -} - -static struct genl_family *genl_family_find_byid(unsigned int id) -{ - struct genl_family *f; - - list_for_each_entry(f, genl_family_chain(id), family_list) - if (f->id == id) - return f; - - return NULL; -} - -static struct genl_family *genl_family_find_byname(char *name) -{ - struct genl_family *f; - int i; + const struct genl_family *family; + unsigned int id; - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - list_for_each_entry(f, genl_family_chain(i), family_list) - if (strcmp(f->name, name) == 0) - return f; + idr_for_each_entry(&genl_fam_idr, family, id) + if (strcmp(family->name, name) == 0) + return family; return NULL; } -static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +static const struct genl_ops *genl_get_cmd(u8 cmd, + const struct genl_family *family) { int i; @@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) return NULL; } -/* Of course we are going to have problems once we hit - * 2^16 alive types, but that can only happen by year 2K -*/ -static u16 genl_generate_id(void) -{ - static u16 id_gen_idx = GENL_MIN_ID; - int i; - - for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { - if (id_gen_idx != GENL_ID_VFS_DQUOT && - id_gen_idx != GENL_ID_PMCRAID && - !genl_family_find_byid(id_gen_idx)) - return id_gen_idx; - if (++id_gen_idx > GENL_MAX_ID) - id_gen_idx = GENL_MIN_ID; - } - - return 0; -} - static int genl_allocate_reserve_groups(int n_groups, int *first_id) { unsigned long *new_groups; @@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) return err; } -static void genl_unregister_mc_groups(struct genl_family *family) +static void genl_unregister_mc_groups(const struct genl_family *family) { struct net *net; int i; @@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family) } /** - * __genl_register_family - register a generic netlink family + * genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. * - * The family's ops array must already be assigned, you can use the - * genl_register_family_with_ops() helper function. + * The family's ops, multicast groups and module pointer must already + * be assigned. * * Return 0 on success or a negative error code. */ -int __genl_register_family(struct genl_family *family) +int genl_register_family(struct genl_family *family) { - int err = -EINVAL, i; - - if (family->id && family->id < GENL_MIN_ID) - goto errout; - - if (family->id > GENL_MAX_ID) - goto errout; + int err, i; + int start = GENL_START_ALLOC, end = GENL_MAX_ID; err = genl_validate_ops(family); if (err) @@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->id == GENL_ID_GENERATE) { - u16 newid = genl_generate_id(); - - if (!newid) { - err = -ENOMEM; - goto errout_locked; - } - - family->id = newid; - } else if (genl_family_find_byid(family->id)) { - err = -EEXIST; - goto errout_locked; + /* + * Sadly, a few cases need to be special-cased + * due to them having previously abused the API + * and having used their family ID also as their + * multicast group ID, so we use reserved IDs + * for both to be sure we can do that mapping. + */ + if (family == &genl_ctrl) { + /* and this needs to be special for initial family lookups */ + start = end = GENL_ID_CTRL; + } else if (strcmp(family->name, "pmcraid") == 0) { + start = end = GENL_ID_PMCRAID; + } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + start = end = GENL_ID_VFS_DQUOT; } if (family->maxattr && !family->parallel_ops) { @@ -402,11 +360,17 @@ int __genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; + family->id = idr_alloc(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); + if (family->id < 0) { + err = family->id; + goto errout_locked; + } + err = genl_validate_assign_mc_groups(family); if (err) - goto errout_free; + goto errout_remove; - list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); /* send all events */ @@ -417,14 +381,14 @@ int __genl_register_family(struct genl_family *family) return 0; -errout_free: +errout_remove: + idr_remove(&genl_fam_idr, family->id); kfree(family->attrbuf); errout_locked: genl_unlock_all(); -errout: return err; } -EXPORT_SYMBOL(__genl_register_family); +EXPORT_SYMBOL(genl_register_family); /** * genl_unregister_family - unregister generic netlink family @@ -434,33 +398,29 @@ EXPORT_SYMBOL(__genl_register_family); * * Returns 0 on success or a negative error code. */ -int genl_unregister_family(struct genl_family *family) +int genl_unregister_family(const struct genl_family *family) { - struct genl_family *rc; - genl_lock_all(); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { - if (family->id != rc->id || strcmp(rc->name, family->name)) - continue; + if (!genl_family_find_byid(family->id)) { + genl_unlock_all(); + return -ENOENT; + } - genl_unregister_mc_groups(family); + genl_unregister_mc_groups(family); - list_del(&rc->family_list); - family->n_ops = 0; - up_write(&cb_lock); - wait_event(genl_sk_destructing_waitq, - atomic_read(&genl_sk_destructing_cnt) == 0); - genl_unlock(); + idr_remove(&genl_fam_idr, family->id); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); - return 0; - } + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); - genl_unlock_all(); + kfree(family->attrbuf); + + genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); - return -ENOENT; + return 0; } EXPORT_SYMBOL(genl_unregister_family); @@ -476,7 +436,7 @@ EXPORT_SYMBOL(genl_unregister_family); * Returns pointer to user specific header */ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd) + const struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; @@ -535,7 +495,7 @@ static int genl_lock_done(struct netlink_callback *cb) return rc; } -static int genl_family_rcv_msg(struct genl_family *family, +static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -647,7 +607,7 @@ out: static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct genl_family *family; + const struct genl_family *family; int err; family = genl_family_find_byid(nlh->nlmsg_type); @@ -676,15 +636,9 @@ static void genl_rcv(struct sk_buff *skb) * Controller **************************************************************************/ -static struct genl_family genl_ctrl = { - .id = GENL_ID_CTRL, - .name = "nlctrl", - .version = 0x2, - .maxattr = CTRL_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family genl_ctrl; -static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, +static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; @@ -771,7 +725,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_family *family, +static int ctrl_fill_mcgrp_info(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) @@ -814,37 +768,30 @@ nla_put_failure: static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) { - - int i, n = 0; + int n = 0; struct genl_family *rt; struct net *net = sock_net(skb->sk); - int chains_to_skip = cb->args[0]; - int fams_to_skip = cb->args[1]; - - for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { - n = 0; - list_for_each_entry(rt, genl_family_chain(i), family_list) { - if (!rt->netnsok && !net_eq(net, &init_net)) - continue; - if (++n < fams_to_skip) - continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) - goto errout; - } + int fams_to_skip = cb->args[0]; + unsigned int id; - fams_to_skip = 0; - } + idr_for_each_entry(&genl_fam_idr, rt, id) { + if (!rt->netnsok && !net_eq(net, &init_net)) + continue; + + if (n++ < fams_to_skip) + continue; -errout: - cb->args[0] = i; - cb->args[1] = n; + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY) < 0) + break; + } + cb->args[0] = n; return skb->len; } -static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, +static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family, u32 portid, int seq, u8 cmd) { struct sk_buff *skb; @@ -864,7 +811,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff * -ctrl_build_mcgrp_msg(struct genl_family *family, +ctrl_build_mcgrp_msg(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, int seq, u8 cmd) { @@ -894,7 +841,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct genl_family *res = NULL; + const struct genl_family *res = NULL; int err = -EINVAL; if (info->attrs[CTRL_ATTR_FAMILY_ID]) { @@ -938,7 +885,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id) { @@ -992,27 +939,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static struct genl_family genl_ctrl __ro_after_init = { + .module = THIS_MODULE, + .ops = genl_ctrl_ops, + .n_ops = ARRAY_SIZE(genl_ctrl_ops), + .mcgrps = genl_ctrl_groups, + .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups), + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, + .netnsok = true, +}; + static int genl_bind(struct net *net, int group) { - int i, err = -ENOENT; + struct genl_family *f; + int err = -ENOENT; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } + + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; } } up_read(&cb_lock); @@ -1022,21 +981,19 @@ static int genl_bind(struct net *net, int group) static void genl_unbind(struct net *net, int group) { - int i; + struct genl_family *f; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; } } up_read(&cb_lock); @@ -1076,13 +1033,9 @@ static struct pernet_operations genl_pernet_ops = { static int __init genl_init(void) { - int i, err; - - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - INIT_LIST_HEAD(&family_ht[i]); + int err; - err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, - genl_ctrl_groups); + err = genl_register_family(&genl_ctrl); if (err < 0) goto problem; @@ -1098,6 +1051,25 @@ problem: subsys_initcall(genl_init); +/** + * genl_family_attrbuf - return family's attrbuf + * @family: the family + * + * Return the family's attrbuf, while validating that it's + * actually valid to access it. + * + * You cannot use this function with a family that has parallel_ops + * and you can only use it within (pre/post) doit/dumpit callbacks. + */ +struct nlattr **genl_family_attrbuf(const struct genl_family *family) +{ + if (!WARN_ON(family->parallel_ops)) + lockdep_assert_held(&genl_mutex); + + return family->attrbuf; +} +EXPORT_SYMBOL(genl_family_attrbuf); + static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { @@ -1127,8 +1099,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, return err; } -int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, - u32 portid, unsigned int group, gfp_t flags) +int genlmsg_multicast_allns(const struct genl_family *family, + struct sk_buff *skb, u32 portid, + unsigned int group, gfp_t flags) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -1137,7 +1110,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags) { struct net *net = genl_info_net(info); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea023b35f1c2..03f3d5c7beb8 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; -static struct genl_family nfc_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NFC_GENL_NAME, - .version = NFC_GENL_VERSION, - .maxattr = NFC_ATTR_MAX, -}; - +static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, @@ -120,21 +113,20 @@ nla_put_failure: static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family); struct nfc_dev *dev; int rc; u32 idx; rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - nfc_genl_family.attrbuf, - nfc_genl_family.maxattr, - nfc_genl_policy); + attrbuf, nfc_genl_family.maxattr, nfc_genl_policy); if (rc < 0) return ERR_PTR(rc); - if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]) + if (!attrbuf[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); - idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]); + idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) @@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = { }, }; +static struct genl_family nfc_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NFC_GENL_NAME, + .version = NFC_GENL_VERSION, + .maxattr = NFC_ATTR_MAX, + .module = THIS_MODULE, + .ops = nfc_genl_ops, + .n_ops = ARRAY_SIZE(nfc_genl_ops), + .mcgrps = nfc_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), +}; + struct urelease_work { struct work_struct w; @@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void) { int rc; - rc = genl_register_family_with_ops_groups(&nfc_genl_family, - nfc_genl_ops, - nfc_genl_mcgrps); + rc = genl_register_family(&nfc_genl_family); if (rc) return rc; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4e03f64709bc..514f7bcf7c63 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -62,9 +62,11 @@ struct ovs_frag_data { struct vport *vport; struct ovs_skb_cb cb; __be16 inner_protocol; - __u16 vlan_tci; + u16 network_offset; /* valid only for MPLS */ + u16 vlan_tci; __be16 vlan_proto; unsigned int l2_len; + u8 mac_proto; u8 l2_data[MAX_L2_LEN]; }; @@ -136,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, static void invalidate_flow_key(struct sw_flow_key *key) { - key->eth.type = htons(0); + key->mac_proto |= SW_FLOW_KEY_INVALID; } static bool is_flow_key_valid(const struct sw_flow_key *key) { - return !!key->eth.type; + return !(key->mac_proto & SW_FLOW_KEY_INVALID); } static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, @@ -185,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); - update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) + update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -195,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, const __be16 ethertype) { - struct ethhdr *hdr; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -211,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* mpls_hdr() is used to locate the ethertype field correctly in the - * presence of VLAN tags. - */ - hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); - update_ethertype(skb, hdr, ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) { + struct ethhdr *hdr; + + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. + */ + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); + update_ethertype(skb, hdr, ethertype); + } if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -311,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +/* pop_eth does not support VLAN packets as this action is never called + * for them. + */ +static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key) +{ + skb_pull_rcsum(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_action_push_eth *ethh) +{ + struct ethhdr *hdr; + + /* Add the new Ethernet header */ + if (skb_cow_head(skb, ETH_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + hdr = eth_hdr(skb); + ether_addr_copy(hdr->h_source, ethh->addresses.eth_src); + ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst); + hdr->h_proto = skb->protocol; + + skb_postpush_rcsum(skb, hdr, ETH_HLEN); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_ETHERNET; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -666,7 +713,13 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); - ovs_vport_send(vport, skb); + if (eth_p_mpls(skb->protocol)) { + skb->inner_network_header = skb->network_header; + skb_set_network_header(skb, data->network_offset); + skb_reset_mac_len(skb); + } + + ovs_vport_send(vport, skb, data->mac_proto); return 0; } @@ -684,7 +737,8 @@ static struct dst_ops ovs_dst_ops = { /* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is * ovs_vport_output(), which is called once per fragmented packet. */ -static void prepare_frag(struct vport *vport, struct sk_buff *skb) +static void prepare_frag(struct vport *vport, struct sk_buff *skb, + u16 orig_network_offset, u8 mac_proto) { unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; @@ -694,8 +748,10 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb) data->vport = vport; data->cb = *OVS_CB(skb); data->inner_protocol = skb->inner_protocol; + data->network_offset = orig_network_offset; data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; + data->mac_proto = mac_proto; data->l2_len = hlen; memcpy(&data->l2_data, skb->data, hlen); @@ -704,18 +760,27 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb) } static void ovs_fragment(struct net *net, struct vport *vport, - struct sk_buff *skb, u16 mru, __be16 ethertype) + struct sk_buff *skb, u16 mru, + struct sw_flow_key *key) { + u16 orig_network_offset = 0; + + if (eth_p_mpls(skb->protocol)) { + orig_network_offset = skb_network_offset(skb); + skb->network_header = skb->inner_network_header; + } + if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); goto err; } - if (ethertype == htons(ETH_P_IP)) { + if (key->eth.type == htons(ETH_P_IP)) { struct dst_entry ovs_dst; unsigned long orig_dst; - prepare_frag(vport, skb); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_dst.dev = vport->dev; @@ -726,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, ip_do_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); - } else if (ethertype == htons(ETH_P_IPV6)) { + } else if (key->eth.type == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); unsigned long orig_dst; struct rt6_info ovs_rt; @@ -735,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - prepare_frag(vport, skb); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); @@ -749,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", - ovs_vport_name(vport), ntohs(ethertype), mru, + ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); goto err; } @@ -769,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, u32 cutlen = OVS_CB(skb)->cutlen; if (unlikely(cutlen > 0)) { - if (skb->len - cutlen > ETH_HLEN) + if (skb->len - cutlen > ovs_mac_header_len(key)) pskb_trim(skb, skb->len - cutlen); else - pskb_trim(skb, ETH_HLEN); + pskb_trim(skb, ovs_mac_header_len(key)); } - if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { - ovs_vport_send(vport, skb); + if (likely(!mru || + (skb->len <= mru + vport->dev->hard_header_len))) { + ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); } else if (mru <= vport->dev->mtu) { struct net *net = read_pnet(&dp->net); - __be16 ethertype = key->eth.type; - - if (!is_flow_key_valid(key)) { - if (eth_p_mpls(skb->protocol)) - ethertype = skb->inner_protocol; - else - ethertype = vlan_get_protocol(skb); - } - ovs_fragment(net, vport, skb, mru, ethertype); + ovs_fragment(net, vport, skb, mru, key); } else { kfree_skb(skb); } @@ -1182,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (err) return err == -EINPROGRESS ? 0 : err; break; + + case OVS_ACTION_ATTR_PUSH_ETH: + err = push_eth(skb, key, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb, key); + break; } if (unlikely(err)) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 31045ef44a82..9b8a028b7dad 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -725,12 +725,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, skb->nfctinfo = IP_CT_NEW; } - /* Repeat if requested, see nf_iterate(). */ - do { - err = nf_conntrack_in(net, info->family, - NF_INET_PRE_ROUTING, skb); - } while (err == NF_REPEAT); - + err = nf_conntrack_in(net, info->family, + NF_INET_PRE_ROUTING, skb); if (err != NF_ACCEPT) return -ENOENT; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d67ea856067..1402f1be642d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -59,7 +59,6 @@ #include "vport-netdev.h" int ovs_net_id __read_mostly; -EXPORT_SYMBOL_GPL(ovs_net_id); static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; @@ -131,7 +130,6 @@ int lockdep_ovsl_is_held(void) else return 1; } -EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held); #endif static struct vport *new_vport(const struct vport_parms *); @@ -562,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct datapath *dp; - struct ethhdr *eth; struct vport *input_vport; u16 mru = 0; int len; @@ -583,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); - skb_reset_mac_header(packet); - eth = eth_hdr(packet); - - /* Normally, setting the skb 'protocol' field would be handled by a - * call to eth_type_trans(), but it assumes there's a sending - * device, which we may not have. */ - if (eth_proto_is_802_3(eth->h_proto)) - packet->protocol = eth->h_proto; - else - packet->protocol = htons(ETH_P_802_2); - /* Set packet's mru */ if (a[OVS_PACKET_ATTR_MRU]) { mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); @@ -620,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; + packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); @@ -672,8 +659,7 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_packet_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, @@ -682,6 +668,7 @@ static struct genl_family dp_packet_genl_family = { .parallel_ops = true, .ops = dp_packet_genl_ops, .n_ops = ARRAY_SIZE(dp_packet_genl_ops), + .module = THIS_MODULE, }; static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, @@ -1437,8 +1424,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_flow_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, @@ -1449,6 +1435,7 @@ static struct genl_family dp_flow_genl_family = { .n_ops = ARRAY_SIZE(dp_flow_genl_ops), .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1823,8 +1810,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static struct genl_family dp_datapath_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_datapath_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, @@ -1835,6 +1821,7 @@ static struct genl_family dp_datapath_genl_family = { .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; /* Called with ovs_mutex or RCU read lock. */ @@ -2245,8 +2232,7 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family dp_vport_genl_family = { - .id = GENL_ID_GENERATE, +struct genl_family dp_vport_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, @@ -2257,6 +2243,7 @@ struct genl_family dp_vport_genl_family = { .n_ops = ARRAY_SIZE(dp_vport_genl_ops), .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static struct genl_family * const dp_genl_families[] = { @@ -2274,7 +2261,7 @@ static void dp_unregister_genl(int n_families) genl_unregister_family(dp_genl_families[i]); } -static int dp_register_genl(void) +static int __init dp_register_genl(void) { int err; int i; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 22087062bd10..08aa926cd5cf 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) return 1; } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +static void clear_vlan(struct sw_flow_key *key) { - int res; - key->eth.vlan.tci = 0; key->eth.vlan.tpid = 0; key->eth.cvlan.tci = 0; key->eth.cvlan.tpid = 0; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; if (skb_vlan_tag_present(skb)) { key->eth.vlan.tci = htons(skb->vlan_tci); @@ -483,17 +486,20 @@ invalid: * * Returns 0 if successful, otherwise a negative errno value. * - * Initializes @skb header pointers as follows: + * Initializes @skb header fields as follows: * - * - skb->mac_header: the Ethernet header. + * - skb->mac_header: the L2 header. * - * - skb->network_header: just past the Ethernet header, or just past the - * VLAN header, to the first byte of the Ethernet payload. + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. * * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { @@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); + /* Link layer. */ + clear_vlan(key); + if (key->mac_proto == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ + skb_reset_network_header(skb); + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + skb->protocol = parse_ethertype(skb); + if (unlikely(skb->protocol == htons(0))) + return -ENOMEM; - skb_reset_network_header(skb); + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + } skb_reset_mac_len(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); + key->eth.type = skb->protocol; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { @@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } +static int key_extract_mac_proto(struct sk_buff *skb) +{ + switch (skb->dev->type) { + case ARPHRD_ETHER: + return MAC_PROTO_ETHERNET; + case ARPHRD_NONE: + if (skb->protocol == htons(ETH_P_TEB)) + return MAC_PROTO_ETHERNET; + return MAC_PROTO_NONE; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { + int res; + /* Extract metadata from packet. */ if (tun_info) { key->tun_proto = ip_tunnel_info_af(tun_info); @@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.skb_mark = skb->mark; ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; + res = key_extract_mac_proto(skb); + if (res < 0) + return res; + key->mac_proto = res; key->recirc_id = 0; return key_extract(skb, key); @@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets recieved from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + */ + skb->protocol = key->eth.type; + } else { + struct ethhdr *eth; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + /* Normally, setting the skb 'protocol' field would be + * handled by a call to eth_type_trans(), but it assumes + * there's a sending device, which we may not have. + */ + if (eth_proto_is_802_3(eth->h_proto)) + skb->protocol = eth->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + } + return key_extract(skb, key); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ae783f5c6695..f61cae7f9030 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,6 +37,12 @@ struct sk_buff; +enum sw_flow_mac_proto { + MAC_PROTO_NONE = 0, + MAC_PROTO_ETHERNET, +}; +#define SW_FLOW_KEY_INVALID 0x80 + /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length * matching for small options. @@ -68,6 +74,7 @@ struct sw_flow_key { u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } __packed phy; /* Safe when right after 'tun_key'. */ + u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */ u8 tun_proto; /* Protocol of encapsulating tunnel. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ @@ -206,6 +213,21 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key) +{ + return key->mac_proto & ~SW_FLOW_KEY_INVALID; +} + +static inline u16 __ovs_mac_header_len(u8 mac_proto) +{ + return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0; +} + +static inline u16 ovs_mac_header_len(const struct sw_flow_key *key) +{ + return __ovs_mac_header_len(ovs_key_mac_proto(key)); +} + static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) { return sfid->ufid_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ae25ded82b3b..d19044f2b1f4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match, static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; + u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they @@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match, return 0; } +static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, + u64 *attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (!eth_proto_is_802_3(eth_type)) { + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { + u8 mac_proto = MAC_PROTO_ETHERNET; + if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } + + /* For layer 3 packets the Ethernet type is provided + * and treated as metadata but no MAC addresses are provided. + */ + if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && + (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) + mac_proto = MAC_PROTO_NONE; + + /* Always exact match mac_proto */ + SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); + + if (mac_proto == MAC_PROTO_NONE) + return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, + log); + return 0; } @@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - } - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - /* VLAN attribute is always parsed before getting here since it - * may occur multiple times. - */ - OVS_NLERR(log, "VLAN attribute unexpected."); - return -EINVAL; - } - - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (!eth_proto_is_802_3(eth_type)) { - OVS_NLERR(log, "EtherType %x is less than min %x", - ntohs(eth_type), ETH_P_802_3_MIN); + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); return -EINVAL; } - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, + log); + if (err) + return err; + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + } else if (!match->key->eth.type) { + OVS_NLERR(log, "Either Ethernet header or EtherType is required."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { @@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (ovs_ct_put_key(output, skb)) goto nla_put_failure; - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - ether_addr_copy(eth_key->eth_src, output->eth.src); - ether_addr_copy(eth_key->eth_dst, output->eth.dst); - - if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) + if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.vlan.tci) - goto unencap; - if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + eth_key = nla_data(nla); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); + + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; - in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.cvlan.tci) + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.vlan.tci) goto unencap; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } } - } - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } } if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) @@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len) static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *skip_copy, __be16 eth_type, bool masked, bool log) + struct sw_flow_actions **sfa, bool *skip_copy, + u8 mac_proto, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: - case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_ETHERNET: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + case OVS_KEY_ATTR_TUNNEL: if (masked) return -EINVAL; /* Masked tunnel set not supported. */ @@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { + u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; @@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), + [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), + [OVS_ACTION_ATTR_POP_ETH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan = nla_data(a); if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; @@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &skip_copy, eth_type, false, log); + &skip_copy, mac_proto, eth_type, + false, log); if (err) return err; break; case OVS_ACTION_ATTR_SET_MASKED: err = validate_set(a, key, sfa, - &skip_copy, eth_type, true, log); + &skip_copy, mac_proto, eth_type, + true, log); if (err) return err; break; @@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_PUSH_ETH: + /* Disallow pushing an Ethernet header if one + * is already present */ + if (mac_proto != MAC_PROTO_NONE) + return -EINVAL; + mac_proto = MAC_PROTO_NONE; + break; + + case OVS_ACTION_ATTR_POP_ETH: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + if (vlan_tci & htons(VLAN_TAG_PRESENT)) + return -EINVAL; + mac_proto = MAC_PROTO_ETHERNET; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index e7da29021b38..d5d6caecd072 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = { .get_link = ethtool_op_get_link, }; -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu) -{ - if (new_mtu < 68) - return -EINVAL; - - netdev->mtu = new_mtu; - return 0; -} - static void internal_dev_destructor(struct net_device *dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); @@ -148,7 +139,6 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = internal_dev_change_mtu, .ndo_get_stats64 = internal_get_stats, .ndo_set_rx_headroom = internal_set_rx_headroom, }; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4e3972344aa6..0389398fa4ab 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb) if (unlikely(!skb)) return; - skb_push(skb, ETH_HLEN); - skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + if (skb->dev->type == ARPHRD_ETHER) { + skb_push(skb, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + } ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: @@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) } if (vport->dev->flags & IFF_LOOPBACK || - vport->dev->type != ARPHRD_ETHER || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || ovs_is_internal_dev(vport->dev)) { err = -EINVAL; goto error_put; @@ -162,7 +165,6 @@ void ovs_netdev_detach_dev(struct vport *vport) netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); } -EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev); static void netdev_destroy(struct vport *vport) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 7387418ac514..b6c8524032a0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -463,27 +463,11 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, ovs_dp_process_packet(skb, &key); return 0; } -EXPORT_SYMBOL_GPL(ovs_vport_receive); -static void free_vport_rcu(struct rcu_head *rcu) +static unsigned int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - struct vport *vport = container_of(rcu, struct vport, rcu); - - ovs_vport_free(vport); -} - -void ovs_vport_deferred_free(struct vport *vport) -{ - if (!vport) - return; - - call_rcu(&vport->rcu, free_vport_rcu); -} -EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); - -static unsigned int packet_length(const struct sk_buff *skb) -{ - unsigned int length = skb->len - ETH_HLEN; + unsigned int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -497,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb) return length; } -void ovs_vport_send(struct vport *vport, struct sk_buff *skb) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) { int mtu = vport->dev->mtu; - if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + switch (vport->dev->type) { + case ARPHRD_NONE: + if (mac_proto == MAC_PROTO_ETHERNET) { + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = htons(ETH_P_TEB); + } else if (mac_proto != MAC_PROTO_NONE) { + WARN_ON_ONCE(1); + goto drop; + } + break; + case ARPHRD_ETHER: + if (mac_proto != MAC_PROTO_ETHERNET) + goto drop; + break; + default: + goto drop; + } + + if (unlikely(packet_length(skb, vport->dev) > mtu && + !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", vport->dev->name, - packet_length(skb), mtu); + packet_length(skb, vport->dev), mtu); vport->dev->stats.tx_errors++; goto drop; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f01f28a567ad..cda66c26ad08 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -149,7 +149,6 @@ struct vport_ops { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); -void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 @@ -198,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops); }) void ovs_vport_ops_unregister(struct vport_ops *ops); -void ovs_vport_send(struct vport *vport, struct sk_buff *skb); +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto); #endif /* vport.h */ diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index fa8237fdc57b..21c28b51be94 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int gprs_set_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops gprs_netdev_ops = { .ndo_open = gprs_open, .ndo_stop = gprs_close, .ndo_start_xmit = gprs_xmit, - .ndo_change_mtu = gprs_set_mtu, }; static void gprs_setup(struct net_device *dev) @@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev) dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; + dev->min_mtu = 576; + dev->max_mtu = (PHONET_MAX_MTU - 11); dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 850a86cde0b3..8bad5624a27a 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1167,7 +1167,7 @@ disabled: /* Wait until flow control allows TX */ done = atomic_read(&pn->tx_credits); while (!done) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (!timeo) { err = -EAGAIN; @@ -1178,10 +1178,9 @@ disabled: goto out; } - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); - finish_wait(sk_sleep(sk), &wait); + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (sk->sk_state != TCP_ESTABLISHED) goto disabled; diff --git a/net/rds/connection.c b/net/rds/connection.c index f5058559bb08..13f459dad4ef 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -689,21 +689,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn) } EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); -/* - * An error occurred on the connection - */ -void -__rds_conn_error(struct rds_connection *conn, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintk(fmt, ap); - va_end(ap); - - rds_conn_drop(conn); -} - void __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 67ba67c058b1..4121e1862444 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -683,10 +683,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -__printf(2, 3) -void __rds_conn_error(struct rds_connection *conn, const char *, ...); -#define rds_conn_error(conn, fmt...) \ - __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) __printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fcddacc92e01..3296a6ac583a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void rds_tcp_tc_info(struct socket *sock, unsigned int len, +static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { @@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, unsigned long flags; struct sockaddr_in sin; int sinlen; + struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); - tsinfo.local_addr = sin.sin_addr.s_addr; - tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); - tsinfo.peer_addr = sin.sin_addr.s_addr; - tsinfo.peer_port = sin.sin_port; + sock = tc->t_sock; + if (sock) { + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 0); + tsinfo.local_addr = sin.sin_addr.s_addr; + tsinfo.local_port = sin.sin_port; + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 1); + tsinfo.peer_addr = sin.sin_addr.s_addr; + tsinfo.peer_port = sin.sin_port; + } tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index e0b23fb5b8d5..c9c496844cd7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -103,7 +103,7 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) if (!peer_is_smaller) return NULL; - for (i = 1; i < npaths; i++) { + for (i = 0; i < npaths; i++) { struct rds_conn_path *cp = &conn->c_path[i]; if (rds_conn_path_transition(cp, RDS_CONN_DOWN, diff --git a/net/rds/threads.c b/net/rds/threads.c index e42df11bf30a..e36e333a0aa0 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work) RDS_CONN_DOWN)) rds_queue_reconnect(cp); else - rds_conn_path_error(cp, - "RDS: connect failed\n"); + rds_conn_path_error(cp, "connect failed\n"); } } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1d87b5453ef7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk) ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(udp_sk, 0, 1, &ret); + skb = skb_recv_udp(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk) __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); - /* The socket buffer we have is owned by UDP, with UDP's data all over - * it, but we really want our own data there. + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. */ - skb_orphan(skb); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d3960033f61..9ff06cfbcdec 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -226,9 +226,7 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) return PTR_ERR(fp); if (tb[TCA_ACT_BPF_NAME]) { - name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), - nla_len(tb[TCA_ACT_BPF_NAME]), - GFP_KERNEL); + name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 378c1c976058..ce7ea6c1c50d 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; + struct nf_hook_state state = { + .net = dev_net(skb->dev), + .in = skb->dev, + .hook = ipt->tcfi_hook, + .pf = NFPROTO_IPV4, + }; if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; @@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, * worry later - danger - this API seems to have changed * from earlier kernels */ - par.net = dev_net(skb->dev); - par.in = skb->dev; - par.out = NULL; - par.hooknum = ipt->tcfi_hook; + par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; - par.family = NFPROTO_IPV4; ret = par.target->target(skb, &par); switch (ret) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6b07fba5770b..6073a1132725 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,25 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); +static bool tcf_mirred_is_act_redirect(int action) +{ + return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; +} + +static u32 tcf_mirred_act_direction(int action) +{ + switch (action) { + case TCA_EGRESS_REDIR: + case TCA_EGRESS_MIRROR: + return AT_EGRESS; + case TCA_INGRESS_REDIR: + case TCA_INGRESS_MIRROR: + return AT_INGRESS; + default: + BUG(); + } +} + static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); @@ -54,17 +73,32 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { static int mirred_net_id; static struct tc_action_ops act_mirred_ops; +static bool dev_is_mac_header_xmit(const struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + } + return true; +} + static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; bool exists = false; + int ret; if (nla == NULL) return -EINVAL; @@ -82,6 +116,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: + case TCA_INGRESS_REDIR: + case TCA_INGRESS_MIRROR: break; default: if (exists) @@ -95,19 +131,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_hash_release(*a, bind); return -ENODEV; } - switch (dev->type) { - case ARPHRD_TUNNEL: - case ARPHRD_TUNNEL6: - case ARPHRD_SIT: - case ARPHRD_IPGRE: - case ARPHRD_VOID: - case ARPHRD_NONE: - ok_push = 0; - break; - default: - ok_push = 1; - break; - } + mac_header_xmit = dev_is_mac_header_xmit(dev); } else { dev = NULL; } @@ -136,7 +160,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); rcu_assign_pointer(m->tcfm_dev, dev); - m->tcfm_ok_push = ok_push; + m->tcfm_mac_header_xmit = mac_header_xmit; } if (ret == ACT_P_CREATED) { @@ -153,15 +177,20 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); + bool m_mac_header_xmit; struct net_device *dev; struct sk_buff *skb2; - int retval, err; + int retval, err = 0; + int m_eaction; + int mac_len; u32 at; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); rcu_read_lock(); + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); retval = READ_ONCE(m->tcf_action); dev = rcu_dereference(m->tcfm_dev); if (unlikely(!dev)) { @@ -180,23 +209,36 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!skb2) goto out; - if (!(at & AT_EGRESS)) { - if (m->tcfm_ok_push) + /* If action's target direction differs than filter's direction, + * and devices expect a mac header on xmit, then mac push/pull is + * needed. + */ + if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) { + if (at & AT_EGRESS) { + /* caught at egress, act ingress: pull mac */ + mac_len = skb_network_header(skb) - skb_mac_header(skb); + skb_pull_rcsum(skb2, mac_len); + } else { + /* caught at ingress, act egress: push mac */ skb_push_rcsum(skb2, skb->mac_len); + } } /* mirror is always swallowed */ - if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + if (tcf_mirred_is_act_redirect(m_eaction)) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - err = dev_queue_xmit(skb2); + if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS) + err = dev_queue_xmit(skb2); + else + err = netif_receive_skb(skb2); if (err) { out: qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); - if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + if (tcf_mirred_is_act_redirect(m_eaction)) retval = TC_ACT_SHOT; } rcu_read_unlock(); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index a133dcb82132..024f3a3afeff 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); - if (d->flags & SKBEDIT_F_MARK) - skb->mark = d->mark; + if (d->flags & SKBEDIT_F_MARK) { + skb->mark &= ~d->mask; + skb->mark |= d->mark & d->mask; + } if (d->flags & SKBEDIT_F_PTYPE) skb->pkt_type = d->ptype; @@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, + [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; struct tcf_skbedit *d; - u32 flags = 0, *priority = NULL, *mark = NULL; + u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; @@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } + if (tb[TCA_SKBEDIT_MASK] != NULL) { + flags |= SKBEDIT_F_MASK; + mask = nla_data(tb[TCA_SKBEDIT_MASK]); + } + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); exists = tcf_hash_check(tn, parm->index, a, bind); @@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->mark = *mark; if (flags & SKBEDIT_F_PTYPE) d->ptype = *ptype; + /* default behaviour is to use all the bits */ + d->mask = 0xffffffff; + if (flags & SKBEDIT_F_MASK) + d->mask = *mask; d->tcf_action = parm->action; @@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if ((d->flags & SKBEDIT_F_PTYPE) && nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) goto nla_put_failure; + if ((d->flags & SKBEDIT_F_MASK) && + nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask)) + goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af47bdf2f483..edc720f11687 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -16,7 +16,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> -#include <net/dst_metadata.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> @@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; + __be16 dst_port = 0; __be64 key_id; int ret = 0; int err; @@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) + dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; @@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; @@ -130,7 +135,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } if (!metadata) { @@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || tunnel_key_dump_addresses(skb, - ¶ms->tcft_enc_metadata->u.tun_info)) + ¶ms->tcft_enc_metadata->u.tun_info) || + nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) goto nla_put_failure; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bb1d5a487081..52dc85acca7d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -369,9 +369,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, return PTR_ERR(fp); if (tb[TCA_BPF_NAME]) { - name = kmemdup(nla_data(tb[TCA_BPF_NAME]), - nla_len(tb[TCA_BPF_NAME]), - GFP_KERNEL); + name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6f40fba599b..e8dd09af0d0c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -43,6 +43,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv4_addrs enc_ipv4; struct flow_dissector_key_ipv6_addrs enc_ipv6; }; + struct flow_dissector_key_ports enc_tp; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -155,6 +156,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, } skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + skb_key.enc_tp.src = key->tp_src; + skb_key.enc_tp.dst = key->tp_dst; } skb_key.indev_ifindex = skb->skb_iif; @@ -269,6 +272,14 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } +static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) +{ + list_del_rcu(&f->list); + fl_hw_destroy_filter(tp, (unsigned long)f); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, fl_destroy_filter); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -277,11 +288,8 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) if (!force && !list_empty(&head->filters)) return false; - list_for_each_entry_safe(f, next, &head->filters, list) { - fl_hw_destroy_filter(tp, (unsigned long)f); - list_del_rcu(&f->list); - call_rcu(&f->rcu, fl_destroy_filter); - } + list_for_each_entry_safe(f, next, &head->filters, list) + __fl_delete(tp, f); RCU_INIT_POINTER(tp->root, NULL); if (head->mask_assigned) rhashtable_destroy(&head->ht); @@ -339,6 +347,14 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -448,6 +464,13 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_SCTP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)); } if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || @@ -484,6 +507,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); + fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)); + + fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst)); + return 0; } @@ -551,6 +582,18 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6); + if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) || + FL_KEY_IS_MASKED(&mask->key, enc_ipv6)) + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, + enc_control); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -741,10 +784,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); - list_del_rcu(&f->list); - fl_hw_destroy_filter(tp, (unsigned long)f); - tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + __fl_delete(tp, f); return 0; } @@ -895,6 +935,14 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_SCTP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)))) + goto nla_put_failure; if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, @@ -920,7 +968,17 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, &mask->enc_key_id, TCA_FLOWER_UNSPEC, - sizeof(key->enc_key_id))) + sizeof(key->enc_key_id)) || + fl_dump_key_val(skb, &key->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)) || + fl_dump_key_val(skb, &key->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst))) goto nla_put_failure; nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index c66ca9400ab4..c1b23e3060b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct xt_action_param acpar; const struct xt_set_info *set = (const void *) em->data; struct net_device *dev, *indev = NULL; + struct nf_hook_state state = { + .net = em->net, + }; int ret, network_offset; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - acpar.family = NFPROTO_IPV4; + state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; acpar.thoff = ip_hdrlen(skb); break; case htons(ETH_P_IPV6): - acpar.family = NFPROTO_IPV6; + state.pf = NFPROTO_IPV6; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) return 0; /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ @@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, return 0; } - acpar.hooknum = 0; - - opt.family = acpar.family; + opt.family = state.pf; opt.dim = set->dim; opt.flags = set->flags; opt.cmdflags = 0; @@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); - acpar.net = em->net; - acpar.in = indev ? indev : dev; - acpar.out = dev; + state.in = indev ? indev : dev; + state.out = dev; + acpar.state = &state; ret = ip_set_test(set->index, skb, &acpar, &opt); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a309a07ccb35..41c80b6c3906 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = skb_vlan_tag_get(skb); - if (!tag && __vlan_get_tag(skb, &tag)) - *err = -1; - else + if (skb_vlan_tag_present(skb)) + dst->value = skb_vlan_tag_get(skb); + else if (!__vlan_get_tag(skb, &tag)) dst->value = tag; + else + *err = -1; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 206dc24add3a..f337f1bdd1d4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->handle = handle; + /* This exist to keep backward compatible with a userspace + * loophole, what allowed userspace to get IFF_NO_QUEUE + * facility on older kernels by setting tx_queue_len=0 (prior + * to qdisc init), and then forgot to reinit tx_queue_len + * before again attaching a qdisc. + */ + if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); + } + if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c798d0de8a9d..9926fe4f3b6f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 2cd9b4478b92..b0196366d58d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -418,9 +418,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) struct teql_master *m = netdev_priv(dev); struct Qdisc *q; - if (new_mtu < 68) - return -EINVAL; - q = m->slaves; if (q) { do { @@ -460,6 +457,8 @@ static __init void teql_master_setup(struct net_device *dev) dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; + dev->min_mtu = 68; + dev->max_mtu = 65535; dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7a1cdf43e49d..615f0ddd41df 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) atomic_set(&msg->refcnt, 1); msg->send_failed = 0; msg->send_error = 0; - msg->can_abandon = 0; msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); @@ -182,20 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* Note: Calculate this outside of the loop, so that all fragments * have the same expiration. */ - if (sinfo->sinfo_timetolive) { - /* sinfo_timetolive is in milliseconds */ + if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive && + (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) || + !SCTP_PR_POLICY(sinfo->sinfo_flags))) msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - msg->can_abandon = 1; - - pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, - msg, msg->expires_at, jiffies); - } - - if (asoc->peer.prsctp_capable && - SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - msg->expires_at = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); /* This is the biggest possible DATA chunk that can fit into * the packet @@ -354,18 +344,8 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->peer.prsctp_capable || - !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { - struct sctp_datamsg *msg = chunk->msg; - - if (!msg->can_abandon) - return 0; - - if (time_after(jiffies, msg->expires_at)) - return 1; - + if (!chunk->asoc->peer.prsctp_capable) return 0; - } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { @@ -378,6 +358,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; + } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && + chunk->msg->expires_at && + time_after(jiffies, chunk->msg->expires_at)) { + return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 6cb0df859195..f5320a87341e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -399,186 +399,72 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) atomic_inc(&sk->sk_wmem_alloc); } -/* All packets are sent to the network through this function from - * sctp_outq_tail(). - * - * The return value is a normal kernel error return value. - */ -int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) +static int sctp_packet_pack(struct sctp_packet *packet, + struct sk_buff *head, int gso, gfp_t gfp) { struct sctp_transport *tp = packet->transport; - struct sctp_association *asoc = tp->asoc; - struct sctphdr *sh; - struct sk_buff *nskb = NULL, *head = NULL; + struct sctp_auth_chunk *auth = NULL; struct sctp_chunk *chunk, *tmp; - struct sock *sk; - int err = 0; - int padding; /* How much padding do we need? */ - int pkt_size; - __u8 has_data = 0; - int gso = 0; - int pktcount = 0; + int pkt_count = 0, pkt_size; + struct sock *sk = head->sk; + struct sk_buff *nskb; int auth_len = 0; - struct dst_entry *dst; - unsigned char *auth = NULL; /* pointer to auth in skb data */ - - pr_debug("%s: packet:%p\n", __func__, packet); - - /* Do NOT generate a chunkless packet. */ - if (list_empty(&packet->chunk_list)) - return err; - - /* Set up convenience variables... */ - chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); - sk = chunk->skb->sk; - /* Allocate the head skb, or main one if not in GSO */ - if (packet->size > tp->pathmtu && !packet->ipfragok) { - if (sk_can_gso(sk)) { - gso = 1; - pkt_size = packet->overhead; - } else { - /* If this happens, we trash this packet and try - * to build a new one, hopefully correct this - * time. Application may notice this error. - */ - pr_err_once("Trying to GSO but underlying device doesn't support it."); - goto err; - } - } else { - pkt_size = packet->size; - } - head = alloc_skb(pkt_size + MAX_HEADER, gfp); - if (!head) - goto err; if (gso) { - NAPI_GRO_CB(head)->last = head; skb_shinfo(head)->gso_type = sk->sk_gso_type; + NAPI_GRO_CB(head)->last = head; + } else { + nskb = head; + pkt_size = packet->size; + goto merge; } - /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(head, packet->overhead + MAX_HEADER); - - /* Set the owning socket so that we know where to get the - * destination IP address. - */ - sctp_packet_set_owner_w(head, sk); - - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { - sctp_assoc_sync_pmtu(sk, asoc); - } - } - dst = dst_clone(tp->dst); - if (!dst) { - if (asoc) - IP_INC_STATS(sock_net(asoc->base.sk), - IPSTATS_MIB_OUTNOROUTES); - goto nodst; - } - skb_dst_set(head, dst); - - /* Build the SCTP header. */ - sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); - skb_reset_transport_header(head); - sh->source = htons(packet->source_port); - sh->dest = htons(packet->destination_port); - - /* From 6.8 Adler-32 Checksum Calculation: - * After the packet is constructed (containing the SCTP common - * header and one or more control or DATA chunks), the - * transmitter shall: - * - * 1) Fill in the proper Verification Tag in the SCTP common - * header and initialize the checksum field to 0's. - */ - sh->vtag = htonl(packet->vtag); - sh->checksum = 0; - - pr_debug("***sctp_transmit_packet***\n"); - do { - /* Set up convenience variables... */ - chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); - pktcount++; - - /* Calculate packet size, so it fits in PMTU. Leave - * other chunks for the next packets. - */ - if (gso) { - pkt_size = packet->overhead; - list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = SCTP_PAD4(chunk->skb->len); - - if (chunk == packet->auth) - auth_len = padded; - else if (auth_len + padded + packet->overhead > - tp->pathmtu) - goto nomem; - else if (pkt_size + padded > tp->pathmtu) - break; - pkt_size += padded; - } + /* calculate the pkt_size and alloc nskb */ + pkt_size = packet->overhead; + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, + list) { + int padded = SCTP_PAD4(chunk->skb->len); - /* Allocate a new skb. */ - nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); - if (!nskb) - goto nomem; - - /* Make sure the outbound skb has enough header - * room reserved. - */ - skb_reserve(nskb, packet->overhead + MAX_HEADER); - } else { - nskb = head; + if (chunk == packet->auth) + auth_len = padded; + else if (auth_len + padded + packet->overhead > + tp->pathmtu) + return 0; + else if (pkt_size + padded > tp->pathmtu) + break; + pkt_size += padded; } + nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); + if (!nskb) + return 0; + skb_reserve(nskb, packet->overhead + MAX_HEADER); - /** - * 3.2 Chunk Field Descriptions - * - * The total length of a chunk (including Type, Length and - * Value fields) MUST be a multiple of 4 bytes. If the length - * of the chunk is not a multiple of 4 bytes, the sender MUST - * pad the chunk with all zero bytes and this padding is not - * included in the chunk length field. The sender should - * never pad with more than 3 bytes. - * - * [This whole comment explains SCTP_PAD4() below.] - */ - +merge: + /* merge chunks into nskb and append nskb into head list */ pkt_size -= packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { + int padding; + list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ - - if (!chunk->resent && !tp->rto_pending) { + if (!sctp_chunk_retransmitted(chunk) && + !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } - - has_data = 1; } padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); - /* if this is the auth chunk that we are adding, - * store pointer where it will be added and put - * the auth into the packet. - */ if (chunk == packet->auth) - auth = skb_tail_pointer(nskb); + auth = (struct sctp_auth_chunk *) + skb_tail_pointer(nskb); - memcpy(skb_put(nskb, chunk->skb->len), - chunk->skb->data, chunk->skb->len); + memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, + chunk->skb->len); pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", chunk, @@ -588,11 +474,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ntohs(chunk->chunk_hdr->length), chunk->skb->len, chunk->rtt_in_progress); - /* If this is a control chunk, this is our last - * reference. Free data chunks after they've been - * acknowledged or have failed. - * Re-queue auth chunks if needed. - */ pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) @@ -602,160 +483,161 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) break; } - /* SCTP-AUTH, Section 6.2 - * The sender MUST calculate the MAC as described in RFC2104 [2] - * using the hash function H as described by the MAC Identifier and - * the shared association key K based on the endpoint pair shared key - * described by the shared key identifier. The 'data' used for the - * computation of the AUTH-chunk is given by the AUTH chunk with its - * HMAC field set to zero (as shown in Figure 6) followed by all - * chunks that are placed after the AUTH chunk in the SCTP packet. - */ - if (auth) - sctp_auth_calculate_hmac(asoc, nskb, - (struct sctp_auth_chunk *)auth, - gfp); - - if (packet->auth) { - if (!list_empty(&packet->chunk_list)) { - /* We will generate more packets, so re-queue - * auth chunk. - */ + if (auth) { + sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp); + /* free auth if no more chunks, or add it back */ + if (list_empty(&packet->chunk_list)) + sctp_chunk_free(packet->auth); + else list_add(&packet->auth->list, &packet->chunk_list); - } else { - sctp_chunk_free(packet->auth); - packet->auth = NULL; - } } - if (!gso) - break; - - if (skb_gro_receive(&head, nskb)) { - kfree_skb(nskb); - goto nomem; + if (gso) { + if (skb_gro_receive(&head, nskb)) { + kfree_skb(nskb); + return 0; + } + if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= + sk->sk_gso_max_segs)) + return 0; } - nskb = NULL; - if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= - sk->sk_gso_max_segs)) - goto nomem; + + pkt_count++; } while (!list_empty(&packet->chunk_list)); - /* 2) Calculate the Adler-32 checksum of the whole packet, - * including the SCTP common header and all the - * chunks. - * - * Note: Adler-32 is no longer applicable, as has been replaced - * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. - * - * If it's a GSO packet, it's postponed to sctp_skb_segment. - */ - if (!sctp_checksum_disable || gso) { - if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) || - dst_xfrm(dst) || packet->ipfragok)) { - sh->checksum = sctp_compute_cksum(head, 0); - } else { - /* no need to seed pseudo checksum for SCTP */ - head->ip_summed = CHECKSUM_PARTIAL; - head->csum_start = skb_transport_header(head) - head->head; - head->csum_offset = offsetof(struct sctphdr, checksum); + if (gso) { + memset(head->cb, 0, max(sizeof(struct inet_skb_parm), + sizeof(struct inet6_skb_parm))); + skb_shinfo(head)->gso_segs = pkt_count; + skb_shinfo(head)->gso_size = GSO_BY_FRAGS; + rcu_read_lock(); + if (skb_dst(head) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); } + rcu_read_unlock(); + goto chksum; } - /* IP layer ECN support - * From RFC 2481 - * "The ECN-Capable Transport (ECT) bit would be set by the - * data sender to indicate that the end-points of the - * transport protocol are ECN-capable." - * - * Now setting the ECT bit all the time, as it should not cause - * any problems protocol-wise even if our peer ignores it. - * - * Note: The works for IPv6 layer checks this bit too later - * in transmission. See IP6_ECN_flow_xmit(). - */ - tp->af_specific->ecn_capable(sk); + if (sctp_checksum_disable) + return 1; - /* Set up the IP options. */ - /* BUG: not implemented - * For v4 this all lives somewhere in sk->sk_opt... - */ + if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) || + dst_xfrm(skb_dst(head)) || packet->ipfragok) { + struct sctphdr *sh = + (struct sctphdr *)skb_transport_header(head); - /* Dump that on IP! */ - if (asoc) { - asoc->stats.opackets += pktcount; - if (asoc->peer.last_sent_to != tp) - /* Considering the multiple CPU scenario, this is a - * "correcter" place for last_sent_to. --xguo - */ - asoc->peer.last_sent_to = tp; + sh->checksum = sctp_compute_cksum(head, 0); + } else { +chksum: + head->ip_summed = CHECKSUM_PARTIAL; + head->csum_start = skb_transport_header(head) - head->head; + head->csum_offset = offsetof(struct sctphdr, checksum); } - if (has_data) { - struct timer_list *timer; - unsigned long timeout; + return pkt_count; +} + +/* All packets are sent to the network through this function from + * sctp_outq_tail(). + * + * The return value is always 0 for now. + */ +int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) +{ + struct sctp_transport *tp = packet->transport; + struct sctp_association *asoc = tp->asoc; + struct sctp_chunk *chunk, *tmp; + int pkt_count, gso = 0; + struct dst_entry *dst; + struct sk_buff *head; + struct sctphdr *sh; + struct sock *sk; - /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { - timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; - timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; + pr_debug("%s: packet:%p\n", __func__, packet); + if (list_empty(&packet->chunk_list)) + return 0; + chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); + sk = chunk->skb->sk; - if (!mod_timer(timer, jiffies + timeout)) - sctp_association_hold(asoc); + /* check gso */ + if (packet->size > tp->pathmtu && !packet->ipfragok) { + if (!sk_can_gso(sk)) { + pr_err_once("Trying to GSO but underlying device doesn't support it."); + goto out; } + gso = 1; + } + + /* alloc head skb */ + head = alloc_skb((gso ? packet->overhead : packet->size) + + MAX_HEADER, gfp); + if (!head) + goto out; + skb_reserve(head, packet->overhead + MAX_HEADER); + sctp_packet_set_owner_w(head, sk); + + /* set sctp header */ + sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); + skb_reset_transport_header(head); + sh->source = htons(packet->source_port); + sh->dest = htons(packet->destination_port); + sh->vtag = htonl(packet->vtag); + sh->checksum = 0; + + /* update dst if in need */ + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); } + dst = dst_clone(tp->dst); + if (!dst) { + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(head); + goto out; + } + skb_dst_set(head, dst); + /* pack up chunks */ + pkt_count = sctp_packet_pack(packet, head, gso, gfp); + if (!pkt_count) { + kfree_skb(head); + goto out; + } pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); - if (gso) { - /* Cleanup our debris for IP stacks */ - memset(head->cb, 0, max(sizeof(struct inet_skb_parm), - sizeof(struct inet6_skb_parm))); + /* start autoclose timer */ + if (packet->has_data && sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { + struct timer_list *timer = + &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; + unsigned long timeout = + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; - skb_shinfo(head)->gso_segs = pktcount; - skb_shinfo(head)->gso_size = GSO_BY_FRAGS; + if (!mod_timer(timer, jiffies + timeout)) + sctp_association_hold(asoc); + } - /* We have to refresh this in case we are xmiting to - * more than one transport at a time - */ - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - rcu_read_unlock(); + /* sctp xmit */ + tp->af_specific->ecn_capable(sk); + if (asoc) { + asoc->stats.opackets += pkt_count; + if (asoc->peer.last_sent_to != tp) + asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(head, tp); - goto out; - -nomem: - if (packet->auth && list_empty(&packet->auth->list)) - sctp_chunk_free(packet->auth); - -nodst: - /* FIXME: Returning the 'err' will effect all the associations - * associated with a socket, although only one of the paths of the - * association is unreachable. - * The real failure of a transport or association can be passed on - * to the user via notifications. So setting this error may not be - * required. - */ - /* err = -EHOSTUNREACH; */ - kfree_skb(head); -err: +out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } - -out: sctp_packet_reset(packet); - return err; + return 0; } /******************************************************************** @@ -871,9 +753,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet, rwnd = 0; asoc->peer.rwnd = rwnd; - /* Has been accepted for transmission. */ - if (!asoc->peer.prsctp_capable) - chunk->msg->can_abandon = 0; sctp_chunk_assign_tsn(chunk); sctp_chunk_assign_ssn(chunk); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 582585393d35..e54082699520 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } - chunk->resent = 1; - /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && + !sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; diff --git a/net/socket.c b/net/socket.c index 272518b087c8..f9e26c68c3cf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -518,8 +518,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** @@ -877,6 +891,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +static struct ns_common *get_net_ns(struct ns_common *ns) +{ + return &get_net(container_of(ns, struct net, ns))->ns; +} + static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; @@ -945,6 +964,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = dlci_ioctl_hook(cmd, argp); mutex_unlock(&dlci_ioctl_mutex); break; + case SIOCGSKNS: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + err = open_related_ns(&net->ns, get_net_ns); + break; default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -3095,6 +3121,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFVLAN: case SIOCADDDLCI: case SIOCDELDLCI: + case SIOCGSKNS: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..78da4aee3543 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -39,6 +39,7 @@ #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/udp.h> #include <net/tcp.h> #include <net/tcp_states.h> #include <asm/uaccess.h> @@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp) } } +static void svc_release_udp_skb(struct svc_rqst *rqstp) +{ + struct sk_buff *skb = rqstp->rq_xprt_ctxt; + + if (skb) { + rqstp->rq_xprt_ctxt = NULL; + + dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + consume_skb(skb); + } +} + union svc_pktinfo_u { struct in_pktinfo pkti; struct in6_pktinfo pkti6; @@ -534,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err >= 0) - skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); if (skb == NULL) { if (err != -EAGAIN) { @@ -575,7 +588,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) goto out_free; } local_bh_enable(); - skb_free_datagram_locked(svsk->sk_sk, skb); + consume_skb(skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data; @@ -602,8 +615,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return len; out_free: - trace_kfree_skb(skb, svc_udp_recvfrom); - skb_free_datagram_locked(svsk->sk_sk, skb); + kfree_skb(skb); return 0; } @@ -660,7 +672,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_release_udp_skb, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e01c825bc683..af392d9b9cec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1080,10 +1080,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_udp(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram_locked(sk, skb); + consume_skb(skb); continue; } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 3b95fe980fa2..017801f9dbaa 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -624,13 +624,10 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info) { - int err; - ASSERT_RTNL(); info->dev = dev; - err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); - return err; + return raw_notifier_call_chain(&switchdev_notif_chain, val, info); } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 50a739860d37..8d408612ffa4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -95,7 +95,7 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - void *handle; + u32 bytes_read; struct sk_buff *tail; bool validated; bool wakeup_pending; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3200059d14b2..26ca8dd64ded 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_V2_NAME, - .version = TIPC_GENL_V2_VERSION, - .hdrsize = 0, - .maxattr = TIPC_NLA_MAX, - .netnsok = true, -}; - static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, @@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #endif }; +struct genl_family tipc_genl_family __ro_after_init = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), +}; + int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { u32 maxattr = tipc_genl_family.maxattr; - *attr = tipc_genl_family.attrbuf; + *attr = genl_family_attrbuf(&tipc_genl_family); if (!*attr) return -EOPNOTSUPP; return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); } -int tipc_netlink_start(void) +int __init tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, - tipc_genl_v2_ops); + res = genl_register_family(&tipc_genl_family); if (res) { pr_err("Failed to register netlink interface\n"); return res; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1fd464764765..e1ae8a8a2b8e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1215,15 +1215,6 @@ send: return err; } -static struct genl_family tipc_genl_compat_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_NAME, - .version = TIPC_GENL_VERSION, - .hdrsize = TIPC_GENL_HDRLEN, - .maxattr = 0, - .netnsok = true, -}; - static struct genl_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, @@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = { }, }; -int tipc_netlink_compat_start(void) +static struct genl_family tipc_genl_compat_family __ro_after_init = { + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_compat_ops, + .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), +}; + +int __init tipc_netlink_compat_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_compat_family, - tipc_genl_compat_ops); + res = genl_register_family(&tipc_genl_compat_family); if (res) { pr_err("Failed to register legacy compat interface\n"); return res; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..22d92f0ec5ac 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -44,44 +44,43 @@ #include "bcast.h" #include "netlink.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ - #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 +enum { + TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_OPEN = TCP_CLOSE, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, + TIPC_CONNECTING = TCP_SYN_SENT, +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API - * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer - * @remote: 'connected' peer for dgram/rdm + * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; - int connected; u32 conn_type; u32 conn_instance; int published; @@ -91,17 +90,16 @@ struct tipc_sock { struct list_head sock_list; struct list_head publications; u32 pub_count; - u32 probing_state; - unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; + bool probe_unacked; bool link_cong; u16 snt_unacked; u16 snd_win; u16 peer_caps; u16 rcv_unacked; u16 rcv_win; - struct sockaddr_tipc remote; + struct sockaddr_tipc peer; struct rhash_head node; struct rcu_head rcu; }; @@ -294,6 +292,21 @@ static void tsk_rej_rx_queue(struct sock *sk) tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } +static bool tipc_sk_connected(struct sock *sk) +{ + return sk->sk_state == TIPC_ESTABLISHED; +} + +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -301,12 +314,13 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { - struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; - if (unlikely(!tsk->connected)) + if (unlikely(!tipc_sk_connected(sk))) return false; if (unlikely(msg_origport(msg) != peer_port)) @@ -327,6 +341,45 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) return false; } +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldsk_state = sk->sk_state; + int res = -EINVAL; + + switch (state) { + case TIPC_OPEN: + res = 0; + break; + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_DISCONNECTING: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; + } + + if (!res) + sk->sk_state = state; + + return res; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -344,7 +397,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, { struct tipc_net *tn; const struct proto_ops *ops; - socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; @@ -356,16 +408,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; - state = SS_UNCONNECTED; break; case SOCK_SEQPACKET: ops = &packet_ops; - state = SS_UNCONNECTED; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; - state = SS_READY; break; default: return -EPROTOTYPE; @@ -386,14 +435,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = state; sock_init_data(sock, sk); + tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -406,11 +456,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); tsk->rcv_win = tsk->snd_win; - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } + return 0; } @@ -421,6 +472,44 @@ static void tipc_sk_callback(struct rcu_head *head) sock_put(&tsk->sk); } +/* Caller should hold socket lock for the socket. */ +static void __tipc_shutdown(struct socket *sock, int error) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + u32 dnode = tsk_peer_node(tsk); + struct sk_buff *skb; + + /* Reject all unreceived messages, except on an active connection + * (which disconnects locally & sends a 'FIN+' to peer). + */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { + kfree_skb(skb); + } else { + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + tipc_sk_respond(sk, skb, error); + } + } + if (sk->sk_state != TIPC_DISCONNECTING) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, error); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (!tipc_sk_type_connectionless(sk)) { + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + } + } +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -440,10 +529,7 @@ static void tipc_sk_callback(struct rcu_head *head) static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net; struct tipc_sock *tsk; - struct sk_buff *skb; - u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -452,47 +538,16 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; - net = sock_net(sk); tsk = tipc_sk(sk); lock_sock(sk); - /* - * Reject all unreceived messages, except on an active connection - * (which disconnects locally & sends a 'FIN+' to peer) - */ - dnode = tsk_peer_node(tsk); - while (sock->state != SS_DISCONNECTING) { - skb = __skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - break; - if (TIPC_SKB_CB(skb)->handle != NULL) - kfree_skb(skb); - else { - if ((sock->state == SS_CONNECTING) || - (sock->state == SS_CONNECTED)) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); - } - } - + __tipc_shutdown(sock, TIPC_ERR_NO_PORT); + sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); - if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, - tsk_own_node(tsk), tsk_peer_port(tsk), - tsk->portid, TIPC_ERR_NO_PORT); - if (skb) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); - } /* Reject any messages that accumulated in backlog queue */ - sock->state = SS_DISCONNECTING; release_sock(sk); call_rcu(&tsk->rcu, tipc_sk_callback); @@ -578,13 +633,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sock->sk); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { - if ((sock->state != SS_CONNECTED) && - ((peer != 2) || (sock->state != SS_DISCONNECTING))) + if ((!tipc_sk_connected(sk)) && + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); @@ -616,28 +672,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * - * TIPC sets the returned events as follows: - * - * socket state flags set - * ------------ --------- - * unconnected no read flags - * POLLOUT if port is not congested - * - * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue - * no write flags - * - * connected POLLIN/POLLRDNORM if data in rx queue - * POLLOUT if port is not congested - * - * disconnecting POLLIN/POLLRDNORM/POLLHUP - * no write flags - * - * listening POLLIN if SYN in rx queue - * no write flags - * - * ready POLLIN/POLLRDNORM if data in rx queue - * [connectionless] POLLOUT (since port cannot be congested) - * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. @@ -651,22 +685,29 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - switch ((int)sock->state) { - case SS_UNCONNECTED: - if (!tsk->link_cong) - mask |= POLLOUT; - break; - case SS_READY: - case SS_CONNECTED: + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP | POLLIN | POLLRDNORM; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + switch (sk->sk_state) { + case TIPC_ESTABLISHED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ - case SS_CONNECTING: - case SS_LISTENING: + case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; - case SS_DISCONNECTING: + case TIPC_OPEN: + if (!tsk->link_cong) + mask |= POLLOUT; + if (tipc_sk_type_connectionless(sk) && + (!skb_queue_empty(&sk->sk_receive_queue))) + mask |= (POLLIN | POLLRDNORM); + break; + case TIPC_DISCONNECTING: mask = (POLLIN | POLLRDNORM | POLLHUP); break; } @@ -697,6 +738,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, uint mtu; int rc; + if (!timeo && tsk->link_cong) + return -ELINKCONG; + msg_set_type(mhdr, TIPC_MCAST_MSG); msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); msg_set_destport(mhdr, 0); @@ -809,7 +853,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; - tsk->probing_state = TIPC_CONN_OK; + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); @@ -832,25 +876,25 @@ exit: static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - DEFINE_WAIT(wait); int done; do { int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; if (!*timeo_p) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->link_cong); - finish_wait(sk_sleep(sk), &wait); + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } @@ -890,6 +934,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head pktchain; + bool is_connectionless = tipc_sk_type_connectionless(sk); struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -900,18 +945,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (tsk->connected && sock->state == SS_READY) - dest = &tsk->remote; + if (is_connectionless && tsk->peer.family == AF_TIPC) + dest = &tsk->peer; else return -EDESTADDRREQ; } else if (unlikely(m->msg_namelen < sizeof(*dest)) || dest->family != AF_TIPC) { return -EINVAL; } - if (unlikely(sock->state != SS_READY)) { - if (sock->state == SS_LISTENING) + if (!is_connectionless) { + if (sk->sk_state == TIPC_LISTEN) return -EPIPE; - if (sock->state != SS_UNCONNECTED) + if (sk->sk_state != TIPC_OPEN) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; @@ -963,8 +1008,8 @@ new_mtu: TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { - if (sock->state != SS_READY) - sock->state = SS_CONNECTING; + if (!is_connectionless) + tipc_set_sk_state(sk, TIPC_CONNECTING); return dsz; } if (rc == -ELINKCONG) { @@ -986,30 +1031,30 @@ new_mtu: static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - DEFINE_WAIT(wait); int done; do { int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; - else if (sock->state != SS_CONNECTED) + else if (!tipc_sk_connected(sk)) return -ENOTCONN; if (!*timeo_p) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && !tsk_conn_cong(tsk)) || - !tsk->connected); - finish_wait(sk_sleep(sk), &wait); + !tipc_sk_connected(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } @@ -1064,14 +1109,17 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > (uint)INT_MAX) return -EMSGSIZE; - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (unlikely(!tipc_sk_connected(sk))) { + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else return -ENOTCONN; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (!timeo && tsk->link_cong) + return -ELINKCONG; + dnode = tsk_peer_node(tsk); skb_queue_head_init(&pktchain); @@ -1145,10 +1193,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_intv = CONN_PROBING_INTERVAL; - tsk->probing_state = TIPC_CONN_OK; - tsk->connected = 1; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -1256,13 +1302,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); - if (!tsk->connected) + if (!tipc_sk_connected(sk)) return; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, @@ -1291,7 +1338,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { + if (sk->sk_shutdown & RCV_SHUTDOWN) { err = -ENOTCONN; break; } @@ -1332,6 +1379,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *buf; struct tipc_msg *msg; + bool is_connectionless = tipc_sk_type_connectionless(sk); long timeo; unsigned int sz; u32 err; @@ -1343,7 +1391,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, lock_sock(sk); - if (unlikely(sock->state == SS_UNCONNECTED)) { + if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) { res = -ENOTCONN; goto exit; } @@ -1388,8 +1436,8 @@ restart: goto exit; res = sz; } else { - if ((sock->state == SS_READY) || - ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) + if (is_connectionless || err == TIPC_CONN_SHUTDOWN || + m->msg_control) res = 0; else res = -ECONNRESET; @@ -1398,7 +1446,7 @@ restart: if (unlikely(flags & MSG_PEEK)) goto exit; - if (likely(sock->state != SS_READY)) { + if (likely(!is_connectionless)) { tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); @@ -1429,7 +1477,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, struct tipc_msg *msg; long timeo; unsigned int sz; - int sz_to_copy, target, needed; + int target; int sz_copied = 0; u32 err; int res = 0, hlen; @@ -1440,7 +1488,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, lock_sock(sk); - if (unlikely(sock->state == SS_UNCONNECTED)) { + if (unlikely(sk->sk_state == TIPC_OPEN)) { res = -ENOTCONN; goto exit; } @@ -1477,11 +1525,13 @@ restart: /* Capture message data (if valid) & compute return value (always) */ if (!err) { - u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + u32 offset = TIPC_SKB_CB(buf)->bytes_read; + u32 needed; + int sz_to_copy; sz -= offset; needed = (buf_len - sz_copied); - sz_to_copy = (sz <= needed) ? sz : needed; + sz_to_copy = min(sz, needed); res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) @@ -1491,8 +1541,8 @@ restart: if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = - (void *)(unsigned long)(offset + sz_to_copy); + TIPC_SKB_CB(buf)->bytes_read = + offset + sz_to_copy; goto exit; } } else { @@ -1574,49 +1624,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct socket *sock = sk->sk_socket; struct tipc_msg *hdr = buf_msg(skb); if (unlikely(msg_mcast(hdr))) return false; - switch ((int)sock->state) { - case SS_CONNECTED: - - /* Accept only connection-based messages sent by peer */ - if (unlikely(!tsk_peer_msg(tsk, hdr))) - return false; - - if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - /* Let timer expire on it's own */ - tipc_node_remove_conn(net, tsk_peer_node(tsk), - tsk->portid); - } - return true; - - case SS_CONNECTING: - + switch (sk->sk_state) { + case TIPC_CONNECTING: /* Accept only ACK or NACK message */ if (unlikely(!msg_connected(hdr))) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; return true; } if (unlikely(!msg_isdata(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; return true; } tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); msg_set_importance(&tsk->phdr, msg_importance(hdr)); - sock->state = SS_CONNECTED; /* If 'ACK+' message, add to socket receive queue */ if (msg_data_sz(hdr)) @@ -1630,18 +1662,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) msg_set_dest_droppable(hdr, 1); return false; - case SS_LISTENING: - case SS_UNCONNECTED: - + case TIPC_OPEN: + case TIPC_DISCONNECTING: + break; + case TIPC_LISTEN: /* Accept only SYN message */ if (!msg_connected(hdr) && !(msg_errcode(hdr))) return true; break; - case SS_DISCONNECTING: - break; + case TIPC_ESTABLISHED: + /* Accept only connection-based messages sent by peer */ + if (unlikely(!tsk_peer_msg(tsk, hdr))) + return false; + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + /* Let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); + sk->sk_state_change(sk); + } + return true; default: - pr_err("Unknown socket state %u\n", sock->state); + pr_err("Unknown sk_state %u\n", sk->sk_state); } + return false; } @@ -1692,7 +1737,6 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) static bool filter_rcv(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *xmitq) { - struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); unsigned int limit = rcvbuf_limit(sk, skb); @@ -1718,7 +1762,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Reject if wrong message type for current socket state */ - if (unlikely(sock->state == SS_READY)) { + if (tipc_sk_type_connectionless(sk)) { if (msg_connected(hdr)) { err = TIPC_ERR_NO_PORT; goto reject; @@ -1735,7 +1779,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Enqueue message */ - TIPC_SKB_CB(skb)->handle = NULL; + TIPC_SKB_CB(skb)->bytes_read = 0; __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -1885,8 +1929,8 @@ xmit: static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; - DEFINE_WAIT(wait); int done; do { @@ -1898,9 +1942,10 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); - finish_wait(sk_sleep(sk), &wait); + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, timeo_p, + sk->sk_state != TIPC_CONNECTING, &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } @@ -1922,21 +1967,19 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; - socket_state previous; + int previous; int res = 0; lock_sock(sk); /* DGRAM/RDM connect(), just save the destaddr */ - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { if (dst->family == AF_UNSPEC) { - memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); - tsk->connected = 0; + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { - memcpy(&tsk->remote, dest, destlen); - tsk->connected = 1; + memcpy(&tsk->peer, dest, destlen); } goto exit; } @@ -1952,9 +1995,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, goto exit; } - previous = sock->state; - switch (sock->state) { - case SS_UNCONNECTED: + previous = sk->sk_state; + + switch (sk->sk_state) { + case TIPC_OPEN: /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; @@ -1969,27 +2013,29 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; - /* Just entered SS_CONNECTING state; the only + /* Just entered TIPC_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - case SS_CONNECTING: - if (previous == SS_CONNECTING) - res = -EALREADY; - if (!timeout) + /* fall thru' */ + case TIPC_CONNECTING: + if (!timeout) { + if (previous == TIPC_CONNECTING) + res = -EALREADY; goto exit; + } timeout = msecs_to_jiffies(timeout); /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ res = tipc_wait_for_connect(sock, &timeout); break; - case SS_CONNECTED: + case TIPC_ESTABLISHED: res = -EISCONN; break; default: res = -EINVAL; - break; } + exit: release_sock(sk); return res; @@ -2008,15 +2054,9 @@ static int tipc_listen(struct socket *sock, int len) int res; lock_sock(sk); - - if (sock->state != SS_UNCONNECTED) - res = -EINVAL; - else { - sock->state = SS_LISTENING; - res = 0; - } - + res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); + return res; } @@ -2042,9 +2082,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = -EINVAL; - if (sock->state != SS_LISTENING) - break; err = -EAGAIN; if (!timeo) break; @@ -2075,7 +2112,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) lock_sock(sk); - if (sock->state != SS_LISTENING) { + if (sk->sk_state != TIPC_LISTEN) { res = -EINVAL; goto exit; } @@ -2086,7 +2123,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); @@ -2106,7 +2143,6 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - new_sock->state = SS_CONNECTED; tsk_set_importance(new_tsock, msg_importance(msg)); if (msg_named(msg)) { @@ -2146,13 +2182,6 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *skb; - u32 dnode = tsk_peer_node(tsk); - u32 dport = tsk_peer_port(tsk); - u32 onode = tipc_own_addr(net); - u32 oport = tsk->portid; int res; if (how != SHUT_RDWR) @@ -2160,45 +2189,17 @@ static int tipc_shutdown(struct socket *sock, int how) lock_sock(sk); - switch (sock->state) { - case SS_CONNECTING: - case SS_CONNECTED: - -restart: - dnode = tsk_peer_node(tsk); - - /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ - skb = __skb_dequeue(&sk->sk_receive_queue); - if (skb) { - if (TIPC_SKB_CB(skb)->handle != NULL) { - kfree_skb(skb); - goto restart; - } - tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN); - } else { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, onode, dport, oport, - TIPC_CONN_SHUTDOWN); - if (skb) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - } - tsk->connected = 0; - sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(net, dnode, tsk->portid); - /* fall through */ - - case SS_DISCONNECTING: + __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); + sk->sk_shutdown = SEND_SHUTDOWN; + if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); res = 0; - break; - - default: + } else { res = -ENOTCONN; } @@ -2215,17 +2216,16 @@ static void tipc_sk_timeout(unsigned long data) u32 own_node = tsk_own_node(tsk); bh_lock_sock(sk); - if (!tsk->connected) { + if (!tipc_sk_connected(sk)) { bh_unlock_sock(sk); goto exit; } peer_port = tsk_peer_port(tsk); peer_node = tsk_peer_node(tsk); - if (tsk->probing_state == TIPC_CONN_PROBING) { + if (tsk->probe_unacked) { if (!sock_owned_by_user(sk)) { - sk->sk_socket->state = SS_DISCONNECTING; - tsk->connected = 0; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); @@ -2234,13 +2234,15 @@ static void tipc_sk_timeout(unsigned long data) sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); } - } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + bh_unlock_sock(sk); + goto exit; } + + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); + tsk->probe_unacked = true; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); @@ -2251,11 +2253,12 @@ exit: static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct publication *publ; u32 key; - if (tsk->connected) + if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) @@ -2713,6 +2716,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct nlattr *attrs; struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sock *sk = &tsk->sk; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -2727,7 +2731,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; - if (tsk->connected) { + if (tipc_sk_connected(sk)) { err = __tipc_nl_add_sk_con(skb, tsk); if (err) goto attr_msg_cancel; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5d1c14a2f268..6a705d0ff889 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, + &err, &last); if (skb) break; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a53b3a16b4f1..687e9fdb3d67 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -619,17 +619,17 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) static void virtio_transport_wait_close(struct sock *sk, long timeout) { if (timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); do { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, - sock_flag(sk, SOCK_DONE))) + sock_flag(sk, SOCK_DONE), &wait)) break; } while (!signal_pending(current) && timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } } diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3f816e2971ee..5db731512014 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = { size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); -struct genl_family wimax_gnl_family = { - .id = GENL_ID_GENERATE, +static const struct genl_multicast_group wimax_gnl_mcgrps[] = { + { .name = "msg", }, +}; + +struct genl_family wimax_gnl_family __ro_after_init = { .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, -}; - -static const struct genl_multicast_group wimax_gnl_mcgrps[] = { - { .name = "msg", }, + .module = THIS_MODULE, + .ops = wimax_gnl_ops, + .n_ops = ARRAY_SIZE(wimax_gnl_ops), + .mcgrps = wimax_gnl_mcgrps, + .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps), }; @@ -596,11 +600,7 @@ int __init wimax_subsys_init(void) d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, "wimax.debug"); - snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), - "WiMAX"); - result = genl_register_family_with_ops_groups(&wimax_gnl_family, - wimax_gnl_ops, - wimax_gnl_mcgrps); + result = genl_register_family(&wimax_gnl_family); if (unlikely(result < 0)) { pr_err("cannot register generic netlink family: %d\n", result); goto error_register_family; diff --git a/net/wireless/core.c b/net/wireless/core.c index 8201e6d7449e..158c59ecf90a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; - if (!wdev->p2p_started) + if (!wdev_running(wdev)) return; rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; + wdev->is_running = false; rdev->opencount--; @@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) return; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return; rdev_stop_nan(rdev, wdev); - wdev->nan_started = false; + wdev->is_running = false; rdev->opencount--; } @@ -562,6 +562,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* + * This isn't well-defined right now. If you have an + * IBSS interface, then its beacon interval may change + * by joining other networks, and nothing prevents it + * from doing that. + * So technically we probably shouldn't even allow AP + * and IBSS in the same interface, but it seems that + * some drivers support that, possibly only with fixed + * beacon intervals for IBSS. + */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) && + c->beacon_int_min_gcd)) { + return -EINVAL; + } + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -571,6 +586,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) return -EINVAL; } +#ifndef CONFIG_WIRELESS_WDS + if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS))) + return -EINVAL; +#endif + /* You can't even choose that many! */ if (WARN_ON(cnt < c->max_interfaces)) return -EINVAL; @@ -609,6 +629,11 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) return -EINVAL; +#ifndef CONFIG_WIRELESS_WDS + if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))) + return -EINVAL; +#endif + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..fb2fcd5581fe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -345,7 +345,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); + const u8 *auth_data, int auth_data_len); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, @@ -475,7 +475,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, u32 *mask); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int); + enum nl80211_iftype iftype, u32 beacon_int); void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index fa2066b56f36..2d8518a37eab 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -183,6 +183,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); wdev->mesh_id_len = setup->mesh_id_len; wdev->chandef = setup->chandef; + wdev->beacon_interval = setup->beacon_interval; } return err; @@ -258,6 +259,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, err = rdev_leave_mesh(rdev, dev); if (!err) { wdev->mesh_id_len = 0; + wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); rdev_set_qos_map(rdev, dev, NULL); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index cbb48e26a871..bd1f7a159d6a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -204,14 +204,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) + const u8 *auth_data, int auth_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req = { .ie = ie, .ie_len = ie_len, - .sae_data = sae_data, - .sae_data_len = sae_data_len, + .auth_data = auth_data, + .auth_data_len = auth_data_len, .auth_type = auth_type, .key = key, .key_len = key_len, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c510810f0b7c..24ab199ef2fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct cfg80211_crypto_settings *settings, int cipher_limit); -static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); -static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = NL80211_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL80211_ATTR_MAX, - .netnsok = true, - .pre_doit = nl80211_pre_doit, - .post_doit = nl80211_post_doit, -}; +static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { @@ -357,7 +343,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, - [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, + [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, @@ -414,6 +400,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, + .len = FILS_MAX_KEK_LEN }, + [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, + [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, }; /* policy for the key attributes */ @@ -435,6 +425,7 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; +#ifdef CONFIG_PM /* policy for WoWLAN attributes */ static const struct nla_policy nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { @@ -468,6 +459,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; +#endif /* CONFIG_PM */ /* policy for coalesce rule attributes */ static const struct nla_policy @@ -551,13 +543,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + genl_family_attrbuf(&nl80211_fam), + nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs( + sock_net(skb->sk), + genl_family_attrbuf(&nl80211_fam)); if (IS_ERR(*wdev)) { err = PTR_ERR(*wdev); goto out_unlock; @@ -1075,6 +1068,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, c->radar_detect_regions))) goto nla_put_failure; + if (c->beacon_int_min_gcd && + nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, + c->beacon_int_min_gcd)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -1322,6 +1319,95 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, return 0; } +#define CMD(op, n) \ + do { \ + if (rdev->ops->op) { \ + i++; \ + if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ + goto nla_put_failure; \ + } \ + } while (0) + +static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + int i = 0; + + /* + * do *NOT* add anything into this function, new things need to be + * advertised only to new versions of userspace that can deal with + * the split (and they can't possibly care about new features... + */ + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(change_virtual_intf, SET_INTERFACE); + CMD(add_key, NEW_KEY); + CMD(start_ap, START_AP); + CMD(add_station, NEW_STATION); + CMD(add_mpath, NEW_MPATH); + CMD(update_mesh_config, SET_MESH_CONFIG); + CMD(change_bss, SET_BSS); + CMD(auth, AUTHENTICATE); + CMD(assoc, ASSOCIATE); + CMD(deauth, DEAUTHENTICATE); + CMD(disassoc, DISASSOCIATE); + CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); + CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); + if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) + goto nla_put_failure; + } + if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || + rdev->ops->join_mesh) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } + CMD(set_wds_peer, SET_WDS_PEER); + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + CMD(tdls_mgmt, TDLS_MGMT); + CMD(tdls_oper, TDLS_OPER); + } + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + CMD(sched_scan_start, START_SCHED_SCAN); + CMD(probe_client, PROBE_CLIENT); + CMD(set_noack_map, SET_NOACK_MAP); + if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + goto nla_put_failure; + } + CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif + + if (rdev->ops->connect || rdev->ops->auth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) + goto nla_put_failure; + } + + if (rdev->ops->disconnect || rdev->ops->deauth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + goto nla_put_failure; + } + + return i; + nla_put_failure: + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -1549,68 +1635,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (!nl_cmds) goto nla_put_failure; - i = 0; -#define CMD(op, n) \ - do { \ - if (rdev->ops->op) { \ - i++; \ - if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ - goto nla_put_failure; \ - } \ - } while (0) - - CMD(add_virtual_intf, NEW_INTERFACE); - CMD(change_virtual_intf, SET_INTERFACE); - CMD(add_key, NEW_KEY); - CMD(start_ap, START_AP); - CMD(add_station, NEW_STATION); - CMD(add_mpath, NEW_MPATH); - CMD(update_mesh_config, SET_MESH_CONFIG); - CMD(change_bss, SET_BSS); - CMD(auth, AUTHENTICATE); - CMD(assoc, ASSOCIATE); - CMD(deauth, DEAUTHENTICATE); - CMD(disassoc, DISASSOCIATE); - CMD(join_ibss, JOIN_IBSS); - CMD(join_mesh, JOIN_MESH); - CMD(set_pmksa, SET_PMKSA); - CMD(del_pmksa, DEL_PMKSA); - CMD(flush_pmksa, FLUSH_PMKSA); - if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) - CMD(remain_on_channel, REMAIN_ON_CHANNEL); - CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(mgmt_tx, FRAME); - CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) - goto nla_put_failure; - } - if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || - rdev->ops->join_mesh) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) - goto nla_put_failure; - } - CMD(set_wds_peer, SET_WDS_PEER); - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { - CMD(tdls_mgmt, TDLS_MGMT); - CMD(tdls_oper, TDLS_OPER); - } - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - CMD(sched_scan_start, START_SCHED_SCAN); - CMD(probe_client, PROBE_CLIENT); - CMD(set_noack_map, SET_NOACK_MAP); - if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) - goto nla_put_failure; - } - CMD(start_p2p_device, START_P2P_DEVICE); - CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif + i = nl80211_add_commands_unsplit(rdev, msg); + if (i < 0) + goto nla_put_failure; if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); @@ -1620,22 +1647,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); + CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); + CMD(update_connect_params, UPDATE_CONNECT_PARAMS); } - /* add into the if now */ #undef CMD - if (rdev->ops->connect || rdev->ops->auth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) - goto nla_put_failure; - } - - if (rdev->ops->disconnect || rdev->ops->deauth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) - goto nla_put_failure; - } - nla_nest_end(msg, nl_cmds); state->split_start++; if (state->split) @@ -1881,7 +1897,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { - struct nlattr **tb = nl80211_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy); /* ignore parse errors for backward compatibility */ @@ -2296,10 +2312,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { - result = nla_parse(tb, NL80211_TXQ_ATTR_MAX, - nla_data(nl_txq_params), - nla_len(nl_txq_params), - txq_params_policy); + result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, + nl_txq_params, + txq_params_policy); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -3549,8 +3564,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); + err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, + nl80211_txattr_policy); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -3756,12 +3771,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && auth_type == NL80211_AUTHTYPE_SAE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_FILS_STA) && + (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK)) + return false; return true; case NL80211_CMD_CONNECT: case NL80211_CMD_START_AP: /* SAE not supported yet */ if (auth_type == NL80211_AUTHTYPE_SAE) return false; + /* FILS not supported yet */ + if (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) + return false; return true; default: return false; @@ -3803,7 +3829,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); - err = cfg80211_validate_beacon_int(rdev, params.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, + params.beacon_interval); if (err) return err; @@ -6305,9 +6332,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { - r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, - nla_data(nl_reg_rule), nla_len(nl_reg_rule), - reg_rule_policy); + r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, + nl_reg_rule, reg_rule_policy); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -6374,8 +6400,8 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, if (!nla_ok(nest, nla_len(nest))) return -EINVAL; - err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest), - nla_len(nest), nl80211_bss_select_policy); + err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, + nl80211_bss_select_policy); if (err) return err; @@ -6765,9 +6791,8 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, if (WARN_ON(i >= n_plans)) return -EINVAL; - err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX, - nla_data(attr), nla_len(attr), - nl80211_plan_policy); + err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, + attr, nl80211_plan_policy); if (err) return err; @@ -6856,9 +6881,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *rssi; - err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_match_policy); + err = nla_parse_nested(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, nl80211_match_policy); if (err) return ERR_PTR(err); /* add other standalone attributes here */ @@ -7029,9 +7054,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *ssid, *rssi; - err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_match_policy); + err = nla_parse_nested(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, nl80211_match_policy); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; @@ -7643,6 +7668,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; @@ -7655,7 +7681,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) return res; /* prepare_wdev_dump parsed the attributes */ - radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; + radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; if (!wdev->netdev) { res = -EINVAL; @@ -7708,8 +7734,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ieee80211_channel *chan; - const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL; - int err, ssid_len, ie_len = 0, sae_data_len = 0; + const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL; + int err, ssid_len, ie_len = 0, auth_data_len = 0; enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; @@ -7789,17 +7815,23 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; - if (auth_type == NL80211_AUTHTYPE_SAE && - !info->attrs[NL80211_ATTR_SAE_DATA]) + if ((auth_type == NL80211_AUTHTYPE_SAE || + auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) && + !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; - if (info->attrs[NL80211_ATTR_SAE_DATA]) { - if (auth_type != NL80211_AUTHTYPE_SAE) + if (info->attrs[NL80211_ATTR_AUTH_DATA]) { + if (auth_type != NL80211_AUTHTYPE_SAE && + auth_type != NL80211_AUTHTYPE_FILS_SK && + auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && + auth_type != NL80211_AUTHTYPE_FILS_PK) return -EINVAL; - sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]); - sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]); + auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); + auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); /* need to include at least Auth Transaction and Status Code */ - if (sae_data_len < 4) + if (auth_data_len < 4) return -EINVAL; } @@ -7816,7 +7848,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, ssid, ssid_len, ie, ie_len, key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + auth_data, auth_data_len); wdev_unlock(dev->ieee80211_ptr); return err; } @@ -7995,6 +8027,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.flags |= ASSOC_REQ_USE_RRM; } + if (info->attrs[NL80211_ATTR_FILS_KEK]) { + req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); + req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); + if (!info->attrs[NL80211_ATTR_FILS_NONCES]) + return -EINVAL; + req.fils_nonces = + nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); + } + err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { wdev_lock(dev->ieee80211_ptr); @@ -8152,7 +8193,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC, + ibss.beacon_interval); if (err) return err; @@ -8478,14 +8520,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb, */ phy_idx = cb->args[0] - 1; } else { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_err; - rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_err; @@ -8493,9 +8535,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, phy_idx = rdev->wiphy_idx; rdev = NULL; - if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) - cb->args[1] = - (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]; + if (attrbuf[NL80211_ATTR_TESTDATA]) + cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; } if (cb->args[1]) { @@ -8726,6 +8767,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_update_connect_params(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_connect_params connect = {}; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + u32 changed = 0; + int ret; + + if (!rdev->ops->update_connect_params) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_IE]) { + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + changed |= UPDATE_ASSOC_IES; + } + + wdev_lock(dev->ieee80211_ptr); + if (!wdev->current_bss) + ret = -ENOLINK; + else + ret = rdev_update_connect_params(rdev, dev, &connect, changed); + wdev_unlock(dev->ieee80211_ptr); + + return ret; +} + static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -9417,7 +9489,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, + NL80211_IFTYPE_MESH_POINT, + setup.beacon_interval); if (err) return err; } @@ -9728,9 +9802,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!rdev->wiphy.wowlan->tcp) return -EINVAL; - err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, - nla_data(attr), nla_len(attr), - nl80211_wowlan_tcp_policy); + err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, + nl80211_wowlan_tcp_policy); if (err) return err; @@ -9875,9 +9948,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse(tb, NL80211_ATTR_MAX, - nla_data(attr), nla_len(attr), - nl80211_policy); + err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy); if (err) goto out; @@ -9911,10 +9982,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto set_wakeup; } - err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG, - nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), - nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), - nl80211_wowlan_policy); + err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, + info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], + nl80211_wowlan_policy); if (err) return err; @@ -9996,8 +10066,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + NULL); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10207,8 +10277,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; - err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule), - nla_len(rule), nl80211_coalesce_policy); + err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, + nl80211_coalesce_policy); if (err) return err; @@ -10246,8 +10316,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), - nla_len(pat), NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10366,10 +10435,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; - err = nla_parse(tb, MAX_NL80211_REKEY_DATA, - nla_data(info->attrs[NL80211_ATTR_REKEY_DATA]), - nla_len(info->attrs[NL80211_ATTR_REKEY_DATA]), - nl80211_rekey_policy); + err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, + info->attrs[NL80211_ATTR_REKEY_DATA], + nl80211_rekey_policy); if (err) return err; @@ -10518,7 +10586,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; - if (wdev->p2p_started) + if (wdev_running(wdev)) return 0; if (rfkill_blocked(rdev->rfkill)) @@ -10528,7 +10596,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->p2p_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10560,7 +10628,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (wdev->nan_started) + if (!wdev_running(wdev)) return -EEXIST; if (rfkill_blocked(rdev->rfkill)) @@ -10583,7 +10651,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->nan_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10638,8 +10706,7 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { - filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), - GFP_KERNEL); + filter[i].filter = nla_memdup(attr, GFP_KERNEL); filter[i].len = nla_len(attr); i++; } @@ -10668,7 +10735,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_NAN_FUNC]) @@ -10678,10 +10745,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb, wdev->owner_nlportid != info->snd_portid) return -ENOTCONN; - err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, - nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), - nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), - nl80211_nan_func_policy); + err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, + info->attrs[NL80211_ATTR_NAN_FUNC], + nl80211_nan_func_policy); if (err) return err; @@ -10776,9 +10842,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; - err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, - nla_data(tb[NL80211_NAN_FUNC_SRF]), - nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + tb[NL80211_NAN_FUNC_SRF], + nl80211_nan_srf_policy); if (err) goto out; @@ -10904,7 +10970,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -10932,7 +10998,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { @@ -11244,10 +11310,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) + if (!wdev_running(wdev)) return -ENETDOWN; } @@ -11277,6 +11340,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; @@ -11312,31 +11376,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, } err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || - !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { + if (!attrbuf[NL80211_ATTR_VENDOR_ID] || + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { err = -EINVAL; goto out_unlock; } - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; - *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*rdev)) { err = PTR_ERR(*rdev); goto out_unlock; } - vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); - subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); + vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; @@ -11360,9 +11421,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, goto out_unlock; } - if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { - data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); - data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); + if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); + data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]); } /* 0 is the first index - add 1 to parse only once */ @@ -11410,10 +11471,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) + if (!wdev_running(wdev)) return -ENETDOWN; } } @@ -11726,6 +11784,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, return 0; } +static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const struct nlattr *nla; + bool enabled; + + if (netif_running(dev)) + return -EBUSY; + + if (!rdev->ops->set_multicast_to_unicast) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED]; + enabled = nla_get_flag(nla); + + return rdev_set_multicast_to_unicast(rdev, dev, enabled); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -11784,29 +11867,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[1] = wdev; } - if (dev) { - if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && - !netif_running(dev)) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } + if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && + !wdev_running(wdev)) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (dev) dev_hold(dev); - } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && - !wdev->p2p_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - if (wdev->iftype == NL80211_IFTYPE_NAN && - !wdev->nan_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - } info->user_ptr[0] = rdev; } @@ -12179,6 +12248,14 @@ static const struct genl_ops nl80211_ops[] = { NL80211_FLAG_NEED_RTNL, }, { + .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, + .doit = nl80211_update_connect_params, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { .cmd = NL80211_CMD_DISCONNECT, .doit = nl80211_disconnect, .policy = nl80211_policy, @@ -12599,6 +12676,29 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, + .doit = nl80211_set_multicast_to_unicast, + .policy = nl80211_policy, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, +}; + +static struct genl_family nl80211_fam __ro_after_init = { + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, + .netnsok = true, + .pre_doit = nl80211_pre_doit, + .post_doit = nl80211_post_doit, + .module = THIS_MODULE, + .ops = nl80211_ops, + .n_ops = ARRAY_SIZE(nl80211_ops), + .mcgrps = nl80211_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), }; /* notification functions */ @@ -14563,12 +14663,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) /* initialisation/exit functions */ -int nl80211_init(void) +int __init nl80211_init(void) { int err; - err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops, - nl80211_mcgrps); + err = genl_register_family(&nl80211_fam); if (err) return err; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 11cf83c8ad4f..2f425075ada8 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_update_connect_params(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *sme, u32 changed) +{ + int ret; + trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed); + ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason_code) { @@ -562,6 +574,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const bool enabled) +{ + int ret; + trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev) { trace_rdev_rfkill_poll(&rdev->wiphy); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a77db333927e..2b5bb380414b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1088,7 +1088,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - else if (wdev->current_bss) + else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); return err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a3d0a91b1e09..ea1b47e04fa4 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect, __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) ); +TRACE_EVENT(rdev_update_connect_params, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_connect_params *sme, u32 changed), + TP_ARGS(wiphy, netdev, sme, changed), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, changed) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->changed = changed; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed) +); + TRACE_EVENT(rdev_set_cqm_rssi_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, s32 rssi_thold, @@ -3030,6 +3048,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); + +TRACE_EVENT(rdev_set_multicast_to_unicast, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const bool enabled), + TP_ARGS(wiphy, netdev, enabled), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(bool, enabled) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->enabled = enabled; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, + BOOL_TO_STR(__entry->enabled)) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 5ea12afc7706..88725f8eefad 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -13,6 +13,7 @@ #include <net/dsfield.h> #include <linux/if_vlan.h> #include <linux/mpls.h> +#include <linux/gcd.h> #include "core.h" #include "rdev-ops.h" @@ -1377,6 +1378,25 @@ static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) return false; } +static size_t skip_ie(const u8 *ies, size_t ielen, size_t pos) +{ + /* we assume a validly formed IEs buffer */ + u8 len = ies[pos + 1]; + + pos += 2 + len; + + /* the IE itself must have 255 bytes for fragments to follow */ + if (len < 255) + return pos; + + while (pos < ielen && ies[pos] == WLAN_EID_FRAGMENT) { + len = ies[pos + 1]; + pos += 2 + len; + } + + return pos; +} + size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, const u8 *after_ric, int n_after_ric, @@ -1386,14 +1406,14 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); while (pos < ielen && !ieee80211_id_in_list(after_ric, n_after_ric, ies[pos])) - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); } else { - pos += 2 + ies[pos + 1]; + pos = skip_ie(ies, ielen, pos); } } @@ -1554,31 +1574,57 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); -int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int) +static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, + u32 *beacon_int_gcd, + bool *beacon_int_different) { struct wireless_dev *wdev; - int res = 0; - if (beacon_int < 10 || beacon_int > 10000) - return -EINVAL; + *beacon_int_gcd = 0; + *beacon_int_different = false; - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + list_for_each_entry(wdev, &wiphy->wdev_list, list) { if (!wdev->beacon_interval) continue; - if (wdev->beacon_interval != beacon_int) { - res = -EINVAL; - break; + + if (!*beacon_int_gcd) { + *beacon_int_gcd = wdev->beacon_interval; + continue; } + + if (wdev->beacon_interval == *beacon_int_gcd) + continue; + + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval); } - return res; + if (new_beacon_int && *beacon_int_gcd != new_beacon_int) { + if (*beacon_int_gcd) + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int); + } +} + +int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, u32 beacon_int) +{ + /* + * This is just a basic pre-condition check; if interface combinations + * are possible the driver must already be checking those with a call + * to cfg80211_check_combinations(), in which case we'll validate more + * through the cfg80211_calculate_bi_data() call and code in + * cfg80211_iter_combinations(). + */ + + if (beacon_int < 10 || beacon_int > 10000) + return -EINVAL; + + return 0; } int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data) @@ -1588,8 +1634,23 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int i, j, iftype; int num_interfaces = 0; u32 used_iftypes = 0; + u32 beacon_int_gcd; + bool beacon_int_different; + + /* + * This is a bit strange, since the iteration used to rely only on + * the data given by the driver, but here it now relies on context, + * in form of the currently operating interfaces. + * This is OK for all current users, and saves us from having to + * push the GCD calculations into all the drivers. + * In the future, this should probably rely more on data that's in + * cfg80211 already - the only thing not would appear to be any new + * interfaces (while being brought up) and channel/radar data. + */ + cfg80211_calculate_bi_data(wiphy, params->new_beacon_int, + &beacon_int_gcd, &beacon_int_different); - if (radar_detect) { + if (params->radar_detect) { rcu_read_lock(); regdom = rcu_dereference(cfg80211_regdomain); if (regdom) @@ -1598,8 +1659,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, } for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - num_interfaces += iftype_num[iftype]; - if (iftype_num[iftype] > 0 && + num_interfaces += params->iftype_num[iftype]; + if (params->iftype_num[iftype] > 0 && !(wiphy->software_iftypes & BIT(iftype))) used_iftypes |= BIT(iftype); } @@ -1613,7 +1674,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if (num_interfaces > c->max_interfaces) continue; - if (num_different_channels > c->num_different_channels) + if (params->num_different_channels > c->num_different_channels) continue; limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, @@ -1628,16 +1689,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, all_iftypes |= limits[j].types; if (!(limits[j].types & BIT(iftype))) continue; - if (limits[j].max < iftype_num[iftype]) + if (limits[j].max < params->iftype_num[iftype]) goto cont; - limits[j].max -= iftype_num[iftype]; + limits[j].max -= params->iftype_num[iftype]; } } - if (radar_detect != (c->radar_detect_widths & radar_detect)) + if (params->radar_detect != + (c->radar_detect_widths & params->radar_detect)) goto cont; - if (radar_detect && c->radar_detect_regions && + if (params->radar_detect && c->radar_detect_regions && !(c->radar_detect_regions & BIT(region))) goto cont; @@ -1649,6 +1711,14 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if ((all_iftypes & used_iftypes) != used_iftypes) goto cont; + if (beacon_int_gcd) { + if (c->beacon_int_min_gcd && + beacon_int_gcd < c->beacon_int_min_gcd) + goto cont; + if (!c->beacon_int_min_gcd && beacon_int_different) + goto cont; + } + /* This combination covered all interface types and * supported the requested numbers, so we're good. */ @@ -1671,14 +1741,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, } int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]) + struct iface_combination_params *params) { int err, num = 0; - err = cfg80211_iter_combinations(wiphy, num_different_channels, - radar_detect, iftype_num, + err = cfg80211_iter_combinations(wiphy, params, cfg80211_iter_sum_ifcombs, &num); if (err) return err; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 419bf5d463bd..45cb7c699b65 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -388,14 +388,6 @@ static void xfrm_state_gc_task(struct work_struct *work) xfrm_state_gc_destroy(x); } -static inline unsigned long make_jiffies(long secs) -{ - if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) - return MAX_SCHEDULE_TIMEOUT-1; - else - return secs*HZ; -} - static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 72c58675973e..ac87f9c068ae 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -2,7 +2,6 @@ obj- := dummy.o # List of programs to build -hostprogs-y := test_verifier test_maps hostprogs-y += sock_example hostprogs-y += fds_example hostprogs-y += sockex1 @@ -29,8 +28,6 @@ hostprogs-y += trace_event hostprogs-y += sampleip hostprogs-y += tc_l2_redirect -test_verifier-objs := test_verifier.o libbpf.o -test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o fds_example-objs := bpf_load.o libbpf.o fds_example.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c deleted file mode 100644 index cce2b59751eb..000000000000 --- a/samples/bpf/test_maps.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Testsuite for eBPF maps - * - * Copyright (c) 2014 PLUMgrid, http://plumgrid.com - * Copyright (c) 2016 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include <stdio.h> -#include <unistd.h> -#include <linux/bpf.h> -#include <errno.h> -#include <string.h> -#include <assert.h> -#include <sys/wait.h> -#include <stdlib.h> -#include "libbpf.h" - -static int map_flags; - -/* sanity tests for map API */ -static void test_hashmap_sanity(int i, void *data) -{ - long long key, next_key, value; - int map_fd; - - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - 2, map_flags); - if (map_fd < 0) { - printf("failed to create hashmap '%s'\n", strerror(errno)); - exit(1); - } - - key = 1; - value = 1234; - /* insert key=1 element */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); - - value = 0; - /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && - /* key=1 already exists */ - errno == EEXIST); - - assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL); - - /* check that key=1 can be found */ - assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); - - key = 2; - /* check that key=2 is not found */ - assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); - - /* BPF_EXIST means: update existing element */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); - - /* insert key=2 element */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); - - /* key=1 and key=2 were inserted, check that key=0 cannot be inserted - * due to max_entries limit - */ - key = 0; - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && - errno == E2BIG); - - /* update existing element, thought the map is full */ - key = 1; - assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); - key = 2; - assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); - key = 1; - assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); - - /* check that key = 0 doesn't exist */ - key = 0; - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); - - /* iterate over two elements */ - assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && - errno == ENOENT); - - /* delete both elements */ - key = 1; - assert(bpf_delete_elem(map_fd, &key) == 0); - key = 2; - assert(bpf_delete_elem(map_fd, &key) == 0); - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); - - key = 0; - /* check that map is empty */ - assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 && - errno == ENOENT); - close(map_fd); -} - -/* sanity tests for percpu map API */ -static void test_percpu_hashmap_sanity(int task, void *data) -{ - long long key, next_key; - int expected_key_mask = 0; - unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - long long value[nr_cpus]; - int map_fd, i; - - map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), - sizeof(value[0]), 2, map_flags); - if (map_fd < 0) { - printf("failed to create hashmap '%s'\n", strerror(errno)); - exit(1); - } - - for (i = 0; i < nr_cpus; i++) - value[i] = i + 100; - key = 1; - /* insert key=1 element */ - assert(!(expected_key_mask & key)); - assert(bpf_update_elem(map_fd, &key, value, BPF_ANY) == 0); - expected_key_mask |= key; - - /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 && - /* key=1 already exists */ - errno == EEXIST); - - /* -1 is an invalid flag */ - assert(bpf_update_elem(map_fd, &key, value, -1) == -1 && - errno == EINVAL); - - /* check that key=1 can be found. value could be 0 if the lookup - * was run from a different cpu. - */ - value[0] = 1; - assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100); - - key = 2; - /* check that key=2 is not found */ - assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT); - - /* BPF_EXIST means: update existing element */ - assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); - - /* insert key=2 element */ - assert(!(expected_key_mask & key)); - assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); - expected_key_mask |= key; - - /* key=1 and key=2 were inserted, check that key=0 cannot be inserted - * due to max_entries limit - */ - key = 0; - assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == -1 && - errno == E2BIG); - - /* check that key = 0 doesn't exist */ - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); - - /* iterate over two elements */ - while (!bpf_get_next_key(map_fd, &key, &next_key)) { - assert((expected_key_mask & next_key) == next_key); - expected_key_mask &= ~next_key; - - assert(bpf_lookup_elem(map_fd, &next_key, value) == 0); - for (i = 0; i < nr_cpus; i++) - assert(value[i] == i + 100); - - key = next_key; - } - assert(errno == ENOENT); - - /* Update with BPF_EXIST */ - key = 1; - assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == 0); - - /* delete both elements */ - key = 1; - assert(bpf_delete_elem(map_fd, &key) == 0); - key = 2; - assert(bpf_delete_elem(map_fd, &key) == 0); - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); - - key = 0; - /* check that map is empty */ - assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 && - errno == ENOENT); - close(map_fd); -} - -static void test_arraymap_sanity(int i, void *data) -{ - int key, next_key, map_fd; - long long value; - - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), - 2, 0); - if (map_fd < 0) { - printf("failed to create arraymap '%s'\n", strerror(errno)); - exit(1); - } - - key = 1; - value = 1234; - /* insert key=1 element */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); - - value = 0; - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && - errno == EEXIST); - - /* check that key=1 can be found */ - assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); - - key = 0; - /* check that key=0 is also found and zero initialized */ - assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); - - - /* key=0 and key=1 were inserted, check that key=2 cannot be inserted - * due to max_entries limit - */ - key = 2; - assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && - errno == E2BIG); - - /* check that key = 2 doesn't exist */ - assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); - - /* iterate over two elements */ - assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && - next_key == 0); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && - next_key == 1); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && - errno == ENOENT); - - /* delete shouldn't succeed */ - key = 1; - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL); - - close(map_fd); -} - -static void test_percpu_arraymap_many_keys(void) -{ - unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - unsigned nr_keys = 20000; - long values[nr_cpus]; - int key, map_fd, i; - - map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), nr_keys, 0); - if (map_fd < 0) { - printf("failed to create per-cpu arraymap '%s'\n", - strerror(errno)); - exit(1); - } - - for (i = 0; i < nr_cpus; i++) - values[i] = i + 10; - - for (key = 0; key < nr_keys; key++) - assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0); - - for (key = 0; key < nr_keys; key++) { - for (i = 0; i < nr_cpus; i++) - values[i] = 0; - assert(bpf_lookup_elem(map_fd, &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - assert(values[i] == i + 10); - } - - close(map_fd); -} - -static void test_percpu_arraymap_sanity(int i, void *data) -{ - unsigned nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - long values[nr_cpus]; - int key, next_key, map_fd; - - map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), 2, 0); - if (map_fd < 0) { - printf("failed to create arraymap '%s'\n", strerror(errno)); - exit(1); - } - - for (i = 0; i < nr_cpus; i++) - values[i] = i + 100; - - key = 1; - /* insert key=1 element */ - assert(bpf_update_elem(map_fd, &key, values, BPF_ANY) == 0); - - values[0] = 0; - assert(bpf_update_elem(map_fd, &key, values, BPF_NOEXIST) == -1 && - errno == EEXIST); - - /* check that key=1 can be found */ - assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100); - - key = 0; - /* check that key=0 is also found and zero initialized */ - assert(bpf_lookup_elem(map_fd, &key, values) == 0 && - values[0] == 0 && values[nr_cpus - 1] == 0); - - - /* check that key=2 cannot be inserted due to max_entries limit */ - key = 2; - assert(bpf_update_elem(map_fd, &key, values, BPF_EXIST) == -1 && - errno == E2BIG); - - /* check that key = 2 doesn't exist */ - assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT); - - /* iterate over two elements */ - assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && - next_key == 0); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && - next_key == 1); - assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && - errno == ENOENT); - - /* delete shouldn't succeed */ - key = 1; - assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL); - - close(map_fd); -} - -#define MAP_SIZE (32 * 1024) -static void test_map_large(void) -{ - struct bigkey { - int a; - char b[116]; - long long c; - } key; - int map_fd, i, value; - - /* allocate 4Mbyte of memory */ - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags); - if (map_fd < 0) { - printf("failed to create large map '%s'\n", strerror(errno)); - exit(1); - } - - for (i = 0; i < MAP_SIZE; i++) { - key = (struct bigkey) {.c = i}; - value = i; - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); - } - key.c = -1; - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && - errno == E2BIG); - - /* iterate through all elements */ - for (i = 0; i < MAP_SIZE; i++) - assert(bpf_get_next_key(map_fd, &key, &key) == 0); - assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); - - key.c = 0; - assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); - key.a = 1; - assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); - - close(map_fd); -} - -/* fork N children and wait for them to complete */ -static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data) -{ - pid_t pid[tasks]; - int i; - - for (i = 0; i < tasks; i++) { - pid[i] = fork(); - if (pid[i] == 0) { - fn(i, data); - exit(0); - } else if (pid[i] == -1) { - printf("couldn't spawn #%d process\n", i); - exit(1); - } - } - for (i = 0; i < tasks; i++) { - int status; - - assert(waitpid(pid[i], &status, 0) == pid[i]); - assert(status == 0); - } -} - -static void test_map_stress(void) -{ - run_parallel(100, test_hashmap_sanity, NULL); - run_parallel(100, test_percpu_hashmap_sanity, NULL); - run_parallel(100, test_arraymap_sanity, NULL); - run_parallel(100, test_percpu_arraymap_sanity, NULL); -} - -#define TASKS 1024 -#define DO_UPDATE 1 -#define DO_DELETE 0 -static void do_work(int fn, void *data) -{ - int map_fd = ((int *)data)[0]; - int do_update = ((int *)data)[1]; - int i; - int key, value; - - for (i = fn; i < MAP_SIZE; i += TASKS) { - key = value = i; - if (do_update) { - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); - assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); - } else { - assert(bpf_delete_elem(map_fd, &key) == 0); - } - } -} - -static void test_map_parallel(void) -{ - int i, map_fd, key = 0, value = 0; - int data[2]; - - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags); - if (map_fd < 0) { - printf("failed to create map for parallel test '%s'\n", - strerror(errno)); - exit(1); - } - - data[0] = map_fd; - data[1] = DO_UPDATE; - /* use the same map_fd in children to add elements to this map - * child_0 adds key=0, key=1024, key=2048, ... - * child_1 adds key=1, key=1025, key=2049, ... - * child_1023 adds key=1023, ... - */ - run_parallel(TASKS, do_work, data); - - /* check that key=0 is already there */ - assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && - errno == EEXIST); - - /* check that all elements were inserted */ - key = -1; - for (i = 0; i < MAP_SIZE; i++) - assert(bpf_get_next_key(map_fd, &key, &key) == 0); - assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); - - /* another check for all elements */ - for (i = 0; i < MAP_SIZE; i++) { - key = MAP_SIZE - i - 1; - assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && - value == key); - } - - /* now let's delete all elemenets in parallel */ - data[1] = DO_DELETE; - run_parallel(TASKS, do_work, data); - - /* nothing should be left */ - key = -1; - assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); -} - -static void run_all_tests(void) -{ - test_hashmap_sanity(0, NULL); - test_percpu_hashmap_sanity(0, NULL); - test_arraymap_sanity(0, NULL); - test_percpu_arraymap_sanity(0, NULL); - test_percpu_arraymap_many_keys(); - - test_map_large(); - test_map_parallel(); - test_map_stress(); -} - -int main(void) -{ - map_flags = 0; - run_all_tests(); - map_flags = BPF_F_NO_PREALLOC; - run_all_tests(); - printf("test_maps: OK\n"); - return 0; -} diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 3276625595b2..122153b16ea4 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -218,6 +218,30 @@ .off = OFF, \ .imm = IMM }) +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ + +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ + +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + /* Program exit */ #define BPF_EXIT_INSN() \ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f770dba2a6f6..a3144a3de3a8 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,5 @@ -TARGETS = breakpoints +TARGETS = bpf +TARGETS += breakpoints TARGETS += capabilities TARGETS += cpu-hotplug TARGETS += efivarfs diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore new file mode 100644 index 000000000000..3c59f96e3ed8 --- /dev/null +++ b/tools/testing/selftests/bpf/.gitignore @@ -0,0 +1,2 @@ +test_verifier +test_maps diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile new file mode 100644 index 000000000000..4761e2d65ab8 --- /dev/null +++ b/tools/testing/selftests/bpf/Makefile @@ -0,0 +1,13 @@ +CFLAGS += -Wall -O2 + +test_objs = test_verifier test_maps + +TEST_PROGS := test_verifier test_maps test_kmod.sh +TEST_FILES := $(test_objs) + +all: $(test_objs) + +include ../lib.mk + +clean: + $(RM) $(test_objs) diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h new file mode 100644 index 000000000000..6b4565f2a3f2 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_sys.h @@ -0,0 +1,108 @@ +#ifndef __BPF_SYS__ +#define __BPF_SYS__ + +#include <stdint.h> +#include <stdlib.h> + +#include <sys/syscall.h> + +#include <linux/bpf.h> + +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long) ptr; +} + +static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ +#ifdef __NR_bpf + return syscall(__NR_bpf, cmd, attr, size); +#else + fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); + errno = ENOSYS; + return -1; +#endif +} + +static inline int bpf_map_lookup(int fd, const void *key, void *value) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + attr.value = bpf_ptr_to_u64(value); + + return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +static inline int bpf_map_update(int fd, const void *key, const void *value, + uint64_t flags) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + attr.value = bpf_ptr_to_u64(value); + attr.flags = flags; + + return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +static inline int bpf_map_delete(int fd, const void *key) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + + return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + +static inline int bpf_map_next_key(int fd, const void *key, void *next_key) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + attr.next_key = bpf_ptr_to_u64(next_key); + + return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +static inline int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t flags) +{ + union bpf_attr attr = {}; + + attr.map_type = type; + attr.key_size = size_key; + attr.value_size = size_value; + attr.max_entries = max_elem; + attr.map_flags = flags; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static inline int bpf_prog_load(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t size_insns, + const char *license, char *log, size_t size_log) +{ + union bpf_attr attr = {}; + + attr.prog_type = type; + attr.insns = bpf_ptr_to_u64(insns); + attr.insn_cnt = size_insns / sizeof(struct bpf_insn); + attr.license = bpf_ptr_to_u64(license); + + if (size_log > 0) { + attr.log_buf = bpf_ptr_to_u64(log); + attr.log_size = size_log; + attr.log_level = 1; + log[0] = 0; + } + + return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +#endif /* __BPF_SYS__ */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config new file mode 100644 index 000000000000..52d53ed08769 --- /dev/null +++ b/tools/testing/selftests/bpf/config @@ -0,0 +1,5 @@ +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_NET_CLS_BPF=m +CONFIG_BPF_EVENTS=y +CONFIG_TEST_BPF=m diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh new file mode 100755 index 000000000000..92e627adf354 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +SRC_TREE=../../../../ + +test_run() +{ + sysctl -w net.core.bpf_jit_enable=$1 2>&1 > /dev/null + sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null + + echo "[ JIT enabled:$1 hardened:$2 ]" + dmesg -C + insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + rmmod test_bpf 2> /dev/null + dmesg | grep FAIL +} + +test_save() +{ + JE=`sysctl -n net.core.bpf_jit_enable` + JH=`sysctl -n net.core.bpf_jit_harden` +} + +test_restore() +{ + sysctl -w net.core.bpf_jit_enable=$JE 2>&1 > /dev/null + sysctl -w net.core.bpf_jit_harden=$JH 2>&1 > /dev/null +} + +rc=0 +test_save +test_run 0 0 +test_run 1 0 +test_run 1 1 +test_run 1 2 +test_restore +exit $rc diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c new file mode 100644 index 000000000000..ee384f02cb6e --- /dev/null +++ b/tools/testing/selftests/bpf/test_maps.c @@ -0,0 +1,525 @@ +/* + * Testsuite for eBPF maps + * + * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +#include <sys/wait.h> +#include <sys/resource.h> + +#include <linux/bpf.h> + +#include "bpf_sys.h" + +static int map_flags; + +static void test_hashmap(int task, void *data) +{ + long long key, next_key, value; + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + 2, map_flags); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* Insert key=1 element. */ + assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0); + + value = 0; + /* BPF_NOEXIST means add new element if it doesn't exist. */ + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 && + /* key=1 already exists. */ + errno == EEXIST); + + /* -1 is an invalid flag. */ + assert(bpf_map_update(fd, &key, &value, -1) == -1 && errno == EINVAL); + + /* Check that key=1 can be found. */ + assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 1234); + + key = 2; + /* Check that key=2 is not found. */ + assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT); + + /* BPF_EXIST means update existing element. */ + assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == -1 && + /* key=2 is not there. */ + errno == ENOENT); + + /* Insert key=2 element. */ + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0); + + /* key=1 and key=2 were inserted, check that key=0 cannot be + * inserted due to max_entries limit. + */ + key = 0; + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* Update existing element, though the map is full. */ + key = 1; + assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == 0); + key = 2; + assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0); + key = 1; + assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0); + + /* Check that key = 0 doesn't exist. */ + key = 0; + assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT); + + /* Iterate over two elements. */ + assert(bpf_map_next_key(fd, &key, &next_key) == 0 && + (next_key == 1 || next_key == 2)); + assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 && + (next_key == 1 || next_key == 2)); + assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* Delete both elements. */ + key = 1; + assert(bpf_map_delete(fd, &key) == 0); + key = 2; + assert(bpf_map_delete(fd, &key) == 0); + assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT); + + key = 0; + /* Check that map is empty. */ + assert(bpf_map_next_key(fd, &key, &next_key) == -1 && + errno == ENOENT); + + close(fd); +} + +static void test_hashmap_percpu(int task, void *data) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + long long value[nr_cpus]; + long long key, next_key; + int expected_key_mask = 0; + int fd, i; + + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), + sizeof(value[0]), 2, map_flags); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + value[i] = i + 100; + + key = 1; + /* Insert key=1 element. */ + assert(!(expected_key_mask & key)); + assert(bpf_map_update(fd, &key, value, BPF_ANY) == 0); + expected_key_mask |= key; + + /* BPF_NOEXIST means add new element if it doesn't exist. */ + assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == -1 && + /* key=1 already exists. */ + errno == EEXIST); + + /* -1 is an invalid flag. */ + assert(bpf_map_update(fd, &key, value, -1) == -1 && errno == EINVAL); + + /* Check that key=1 can be found. Value could be 0 if the lookup + * was run from a different CPU. + */ + value[0] = 1; + assert(bpf_map_lookup(fd, &key, value) == 0 && value[0] == 100); + + key = 2; + /* Check that key=2 is not found. */ + assert(bpf_map_lookup(fd, &key, value) == -1 && errno == ENOENT); + + /* BPF_EXIST means update existing element. */ + assert(bpf_map_update(fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there. */ + errno == ENOENT); + + /* Insert key=2 element. */ + assert(!(expected_key_mask & key)); + assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == 0); + expected_key_mask |= key; + + /* key=1 and key=2 were inserted, check that key=0 cannot be + * inserted due to max_entries limit. + */ + key = 0; + assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* Check that key = 0 doesn't exist. */ + assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT); + + /* Iterate over two elements. */ + while (!bpf_map_next_key(fd, &key, &next_key)) { + assert((expected_key_mask & next_key) == next_key); + expected_key_mask &= ~next_key; + + assert(bpf_map_lookup(fd, &next_key, value) == 0); + + for (i = 0; i < nr_cpus; i++) + assert(value[i] == i + 100); + + key = next_key; + } + assert(errno == ENOENT); + + /* Update with BPF_EXIST. */ + key = 1; + assert(bpf_map_update(fd, &key, value, BPF_EXIST) == 0); + + /* Delete both elements. */ + key = 1; + assert(bpf_map_delete(fd, &key) == 0); + key = 2; + assert(bpf_map_delete(fd, &key) == 0); + assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT); + + key = 0; + /* Check that map is empty. */ + assert(bpf_map_next_key(fd, &key, &next_key) == -1 && + errno == ENOENT); + + close(fd); +} + +static void test_arraymap(int task, void *data) +{ + int key, next_key, fd; + long long value; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* Insert key=1 element. */ + assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0); + + value = 0; + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* Check that key=1 can be found. */ + assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 1234); + + key = 0; + /* Check that key=0 is also found and zero initialized. */ + assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 0); + + /* key=0 and key=1 were inserted, check that key=2 cannot be inserted + * due to max_entries limit. + */ + key = 2; + assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == -1 && + errno == E2BIG); + + /* Check that key = 2 doesn't exist. */ + assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT); + + /* Iterate over two elements. */ + assert(bpf_map_next_key(fd, &key, &next_key) == 0 && + next_key == 0); + assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* Delete shouldn't succeed. */ + key = 1; + assert(bpf_map_delete(fd, &key) == -1 && errno == EINVAL); + + close(fd); +} + +static void test_arraymap_percpu(int task, void *data) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int key, next_key, fd, i; + long values[nr_cpus]; + + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(values[0]), 2, 0); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + values[i] = i + 100; + + key = 1; + /* Insert key=1 element. */ + assert(bpf_map_update(fd, &key, values, BPF_ANY) == 0); + + values[0] = 0; + assert(bpf_map_update(fd, &key, values, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* Check that key=1 can be found. */ + assert(bpf_map_lookup(fd, &key, values) == 0 && values[0] == 100); + + key = 0; + /* Check that key=0 is also found and zero initialized. */ + assert(bpf_map_lookup(fd, &key, values) == 0 && + values[0] == 0 && values[nr_cpus - 1] == 0); + + /* Check that key=2 cannot be inserted due to max_entries limit. */ + key = 2; + assert(bpf_map_update(fd, &key, values, BPF_EXIST) == -1 && + errno == E2BIG); + + /* Check that key = 2 doesn't exist. */ + assert(bpf_map_lookup(fd, &key, values) == -1 && errno == ENOENT); + + /* Iterate over two elements. */ + assert(bpf_map_next_key(fd, &key, &next_key) == 0 && + next_key == 0); + assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* Delete shouldn't succeed. */ + key = 1; + assert(bpf_map_delete(fd, &key) == -1 && errno == EINVAL); + + close(fd); +} + +static void test_arraymap_percpu_many_keys(void) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + unsigned int nr_keys = 20000; + long values[nr_cpus]; + int key, fd, i; + + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), + sizeof(values[0]), nr_keys, 0); + if (fd < 0) { + printf("Failed to create per-cpu arraymap '%s'!\n", + strerror(errno)); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) + values[i] = i + 10; + + for (key = 0; key < nr_keys; key++) + assert(bpf_map_update(fd, &key, values, BPF_ANY) == 0); + + for (key = 0; key < nr_keys; key++) { + for (i = 0; i < nr_cpus; i++) + values[i] = 0; + + assert(bpf_map_lookup(fd, &key, values) == 0); + + for (i = 0; i < nr_cpus; i++) + assert(values[i] == i + 10); + } + + close(fd); +} + +#define MAP_SIZE (32 * 1024) + +static void test_map_large(void) +{ + struct bigkey { + int a; + char b[116]; + long long c; + } key; + int fd, i, value; + + fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags); + if (fd < 0) { + printf("Failed to create large map '%s'!\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < MAP_SIZE; i++) { + key = (struct bigkey) { .c = i }; + value = i; + + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0); + } + + key.c = -1; + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* Iterate through all elements. */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_next_key(fd, &key, &key) == 0); + assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT); + + key.c = 0; + assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 0); + key.a = 1; + assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT); + + close(fd); +} + +static void run_parallel(int tasks, void (*fn)(int task, void *data), + void *data) +{ + pid_t pid[tasks]; + int i; + + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + fn(i, data); + exit(0); + } else if (pid[i] == -1) { + printf("Couldn't spawn #%d process!\n", i); + exit(1); + } + } + + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } +} + +static void test_map_stress(void) +{ + run_parallel(100, test_hashmap, NULL); + run_parallel(100, test_hashmap_percpu, NULL); + + run_parallel(100, test_arraymap, NULL); + run_parallel(100, test_arraymap_percpu, NULL); +} + +#define TASKS 1024 + +#define DO_UPDATE 1 +#define DO_DELETE 0 + +static void do_work(int fn, void *data) +{ + int do_update = ((int *)data)[1]; + int fd = ((int *)data)[0]; + int i, key, value; + + for (i = fn; i < MAP_SIZE; i += TASKS) { + key = value = i; + + if (do_update) { + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0); + assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == 0); + } else { + assert(bpf_map_delete(fd, &key) == 0); + } + } +} + +static void test_map_parallel(void) +{ + int i, fd, key = 0, value = 0; + int data[2]; + + fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE, map_flags); + if (fd < 0) { + printf("Failed to create map for parallel test '%s'!\n", + strerror(errno)); + exit(1); + } + + /* Use the same fd in children to add elements to this map: + * child_0 adds key=0, key=1024, key=2048, ... + * child_1 adds key=1, key=1025, key=2049, ... + * child_1023 adds key=1023, ... + */ + data[0] = fd; + data[1] = DO_UPDATE; + run_parallel(TASKS, do_work, data); + + /* Check that key=0 is already there. */ + assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* Check that all elements were inserted. */ + key = -1; + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_next_key(fd, &key, &key) == 0); + assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT); + + /* Another check for all elements */ + for (i = 0; i < MAP_SIZE; i++) { + key = MAP_SIZE - i - 1; + + assert(bpf_map_lookup(fd, &key, &value) == 0 && + value == key); + } + + /* Now let's delete all elemenets in parallel. */ + data[1] = DO_DELETE; + run_parallel(TASKS, do_work, data); + + /* Nothing should be left. */ + key = -1; + assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT); +} + +static void run_all_tests(void) +{ + test_hashmap(0, NULL); + test_hashmap_percpu(0, NULL); + + test_arraymap(0, NULL); + test_arraymap_percpu(0, NULL); + + test_arraymap_percpu_many_keys(); + + test_map_large(); + test_map_parallel(); + test_map_stress(); +} + +int main(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); + + map_flags = 0; + run_all_tests(); + + map_flags = BPF_F_NO_PREALLOC; + run_all_tests(); + + printf("test_maps: OK\n"); + return 0; +} diff --git a/samples/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 369ffaad3799..0ef8eaf6cea7 100644 --- a/samples/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7,29 +7,39 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ + #include <stdio.h> #include <unistd.h> -#include <linux/bpf.h> #include <errno.h> -#include <linux/unistd.h> #include <string.h> -#include <linux/filter.h> #include <stddef.h> #include <stdbool.h> +#include <sched.h> + #include <sys/resource.h> -#include "libbpf.h" -#define MAX_INSNS 512 -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#include <linux/unistd.h> +#include <linux/filter.h> +#include <linux/bpf_perf_event.h> +#include <linux/bpf.h> + +#include "../../../include/linux/filter.h" + +#include "bpf_sys.h" + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif -#define MAX_FIXUPS 8 +#define MAX_INSNS 512 +#define MAX_FIXUPS 8 struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; - int fixup[MAX_FIXUPS]; - int prog_array_fixup[MAX_FIXUPS]; - int test_val_map_fixup[MAX_FIXUPS]; + int fixup_map1[MAX_FIXUPS]; + int fixup_map2[MAX_FIXUPS]; + int fixup_prog[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -44,15 +54,12 @@ struct bpf_test { * actually the end of the structure. */ #define MAX_ENTRIES 11 + struct test_val { - unsigned index; + unsigned int index; int foo[MAX_ENTRIES]; }; -struct other_val { - unsigned int action[32]; -}; - static struct bpf_test tests[] = { { "add+sub+mul", @@ -287,10 +294,11 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup = {2}, + .fixup_map1 = { 2 }, .errstr = "invalid indirect read from stack", .result = REJECT, }, @@ -307,8 +315,10 @@ static struct bpf_test tests[] = { { "invalid argument register", .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_cgroup_classid), BPF_EXIT_INSN(), }, .errstr = "R1 !read_ok", @@ -319,9 +329,11 @@ static struct bpf_test tests[] = { "non-invalid argument register", .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_cgroup_classid), BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_cgroup_classid), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -332,10 +344,8 @@ static struct bpf_test tests[] = { .insns = { /* spill R1(ctx) into stack */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* fill it back into R2 */ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), - /* should be able to access R0 = *(R2 + 8) */ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), @@ -363,13 +373,10 @@ static struct bpf_test tests[] = { .insns = { /* spill R1(ctx) into stack */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* mess up with R1 pointer on stack */ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 should fail */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), }, .errstr_unpriv = "attempt to corrupt spilled", @@ -483,7 +490,8 @@ static struct bpf_test tests[] = { BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, .errstr = "fd 0 is not pointing to valid bpf_map", @@ -496,11 +504,12 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr = "R0 invalid mem access 'map_value_or_null'", .result = REJECT, }, @@ -511,12 +520,13 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr = "misaligned access", .result = REJECT, }, @@ -527,14 +537,15 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr = "R0 invalid mem access", .errstr_unpriv = "R0 leaks addr", .result = REJECT, @@ -619,10 +630,11 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, - .fixup = {24}, + .fixup_map1 = { 24 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -763,7 +775,8 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), @@ -771,7 +784,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, pkt_type)), BPF_EXIT_INSN(), }, - .fixup = {4}, + .fixup_map1 = { 4 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -787,13 +800,14 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -12), }, - .fixup = {6}, + .fixup_map1 = { 6 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -810,13 +824,14 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -13), }, - .fixup = {7}, + .fixup_map1 = { 7 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1039,7 +1054,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), BPF_MOV64_IMM(BPF_REG_2, 8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_trace_printk), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -1056,11 +1072,12 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr_unpriv = "R4 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -1072,11 +1089,12 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr = "invalid indirect read from stack off -8+0 size 8", .result = REJECT, }, @@ -1128,18 +1146,125 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "unpriv: spill/fill of ctx", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "unpriv: spill/fill of ctx 2", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_hash_recalc), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of ctx 3", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_hash_recalc), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=fp expected=ctx", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of ctx 4", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, + BPF_REG_0, -8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_hash_recalc), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=inv expected=ctx", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers ldx", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + -(__s32)offsetof(struct bpf_perf_event_data, + sample_period) - 8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, + offsetof(struct bpf_perf_event_data, + sample_period)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { "unpriv: write pointer into map elem value", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {3}, + .fixup_map1 = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -1160,11 +1285,12 @@ static struct bpf_test tests[] = { .insns = { BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .prog_array_fixup = {1}, + .fixup_prog = { 1 }, .errstr_unpriv = "R3 leaks addr into helper", .result_unpriv = REJECT, .result = ACCEPT, @@ -1178,7 +1304,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {1}, + .fixup_map1 = { 1 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -1194,6 +1320,19 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "unpriv: spill/fill frame pointer", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "frame pointer is read only", + .result = REJECT, + }, + { "unpriv: cmp of frame pointer", .insns = { BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0), @@ -1254,7 +1393,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1270,7 +1410,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, ~0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1286,7 +1427,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1302,7 +1444,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1318,7 +1461,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1331,13 +1475,14 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_2, 4), BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, @@ -1354,11 +1499,12 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_2, 4), BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), /* fill ctx into R0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), @@ -1373,15 +1519,16 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_2, 4), BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill ctx into R3 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, @@ -1402,15 +1549,16 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_2, 4), BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), /* spill ctx from R1 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), /* spill ctx from R1 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), /* spill ctx from R1 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), /* fill ctx into R0 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), /* fill ctx into R2 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), /* fill data into R3 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, @@ -1430,7 +1578,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1446,7 +1595,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1462,7 +1612,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1478,7 +1629,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1494,7 +1646,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1510,7 +1663,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1526,7 +1680,8 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), BPF_MOV64_IMM(BPF_REG_4, 512), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, @@ -1747,11 +1902,12 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {5}, + .fixup_map1 = { 5 }, .result_unpriv = ACCEPT, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, @@ -1762,11 +1918,12 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {1}, + .fixup_map1 = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -1789,11 +1946,12 @@ static struct bpf_test tests[] = { BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {11}, + .fixup_map1 = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -1810,11 +1968,12 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {7}, + .fixup_map1 = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -1831,11 +1990,12 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {6}, + .fixup_map1 = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -1853,11 +2013,12 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {5}, + .fixup_map1 = { 5 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -1867,11 +2028,12 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {1}, + .fixup_map1 = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -1894,11 +2056,12 @@ static struct bpf_test tests[] = { BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {11}, + .fixup_map1 = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -1915,11 +2078,12 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {7}, + .fixup_map1 = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -1936,11 +2100,12 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup = {6}, + .fixup_map1 = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -1959,7 +2124,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_4, 42), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_store_bytes), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -1979,7 +2145,8 @@ static struct bpf_test tests[] = { BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_4, 4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2003,7 +2170,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2026,7 +2194,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2050,7 +2219,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2074,7 +2244,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2098,7 +2269,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2122,7 +2294,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2146,7 +2319,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2169,7 +2343,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -2184,12 +2359,14 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -2201,16 +2378,18 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), BPF_MOV64_IMM(BPF_REG_1, 4), BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, - .errstr_unpriv = "R0 leaks addr", + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -2221,17 +2400,19 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, - .errstr_unpriv = "R0 leaks addr", + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -2242,7 +2423,8 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), @@ -2252,11 +2434,12 @@ static struct bpf_test tests[] = { BPF_MOV32_IMM(BPF_REG_1, 0), BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, - .errstr_unpriv = "R0 leaks addr", + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -2267,13 +2450,14 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, .errstr = "invalid access to map value, value_size=48 off=48 size=8", .result = REJECT, }, @@ -2284,16 +2468,20 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is outside of the array range", + .result_unpriv = REJECT, .result = REJECT, }, { @@ -2303,16 +2491,20 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result_unpriv = REJECT, .result = REJECT, }, { @@ -2322,7 +2514,8 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), @@ -2330,11 +2523,14 @@ static struct bpf_test tests[] = { BPF_MOV32_IMM(BPF_REG_1, 0), BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result_unpriv = REJECT, .result = REJECT, }, { @@ -2344,7 +2540,8 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), @@ -2352,11 +2549,14 @@ static struct bpf_test tests[] = { BPF_MOV32_IMM(BPF_REG_1, 0), BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3}, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .result_unpriv = REJECT, .result = REJECT, }, { @@ -2366,164 +2566,271 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .test_val_map_fixup = {3, 11}, + .fixup_map2 = { 3, 11 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result_unpriv = REJECT, + .result = REJECT, + }, + { + "multiple registers share map_lookup_elem result", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "invalid memory access with multiple map_lookup_elem calls", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, .result = REJECT, + .errstr = "R4 !read_ok", + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "valid indirect map_lookup_elem access with 2nd lookup in branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_2, 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS }, }; -static int probe_filter_length(struct bpf_insn *fp) +static int probe_filter_length(const struct bpf_insn *fp) { - int len = 0; + int len; for (len = MAX_INSNS - 1; len > 0; --len) if (fp[len].code != 0 || fp[len].imm != 0) break; - return len + 1; } -static int create_map(size_t val_size, int num) +static int create_map(uint32_t size_value, uint32_t max_elem) { - int map_fd; + int fd; - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(long long), val_size, num, 0); - if (map_fd < 0) - printf("failed to create map '%s'\n", strerror(errno)); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(long long), + size_value, max_elem, BPF_F_NO_PREALLOC); + if (fd < 0) + printf("Failed to create hash map '%s'!\n", strerror(errno)); - return map_fd; + return fd; } static int create_prog_array(void) { - int map_fd; + int fd; - map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, - sizeof(int), sizeof(int), 4, 0); - if (map_fd < 0) - printf("failed to create prog_array '%s'\n", strerror(errno)); + fd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), + sizeof(int), 4, 0); + if (fd < 0) + printf("Failed to create prog array '%s'!\n", strerror(errno)); - return map_fd; + return fd; } -static int test(void) +static char bpf_vlog[32768]; + +static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, + int *fd_f1, int *fd_f2, int *fd_f3) { - int prog_fd, i, pass_cnt = 0, err_cnt = 0; - bool unpriv = geteuid() != 0; + int *fixup_map1 = test->fixup_map1; + int *fixup_map2 = test->fixup_map2; + int *fixup_prog = test->fixup_prog; + + /* Allocating HTs with 1 elem is fine here, since we only test + * for verifier and not do a runtime lookup, so the only thing + * that really matters is value size in this case. + */ + if (*fixup_map1) { + *fd_f1 = create_map(sizeof(long long), 1); + do { + prog[*fixup_map1].imm = *fd_f1; + fixup_map1++; + } while (*fixup_map1); + } - for (i = 0; i < ARRAY_SIZE(tests); i++) { - struct bpf_insn *prog = tests[i].insns; - int prog_type = tests[i].prog_type; - int prog_len = probe_filter_length(prog); - int *fixup = tests[i].fixup; - int *prog_array_fixup = tests[i].prog_array_fixup; - int *test_val_map_fixup = tests[i].test_val_map_fixup; - int expected_result; - const char *expected_errstr; - int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1; + if (*fixup_map2) { + *fd_f2 = create_map(sizeof(struct test_val), 1); + do { + prog[*fixup_map2].imm = *fd_f2; + fixup_map2++; + } while (*fixup_map2); + } - if (*fixup) { - map_fd = create_map(sizeof(long long), 1024); + if (*fixup_prog) { + *fd_f3 = create_prog_array(); + do { + prog[*fixup_prog].imm = *fd_f3; + fixup_prog++; + } while (*fixup_prog); + } +} - do { - prog[*fixup].imm = map_fd; - fixup++; - } while (*fixup); - } - if (*prog_array_fixup) { - prog_array_fd = create_prog_array(); +static void do_test_single(struct bpf_test *test, bool unpriv, + int *passes, int *errors) +{ + struct bpf_insn *prog = test->insns; + int prog_len = probe_filter_length(prog); + int prog_type = test->prog_type; + int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1; + int fd_prog, expected_ret; + const char *expected_err; - do { - prog[*prog_array_fixup].imm = prog_array_fd; - prog_array_fixup++; - } while (*prog_array_fixup); - } - if (*test_val_map_fixup) { - /* Unprivileged can't create a hash map.*/ - if (unpriv) - continue; - test_val_map_fd = create_map(sizeof(struct test_val), - 256); - do { - prog[*test_val_map_fixup].imm = test_val_map_fd; - test_val_map_fixup++; - } while (*test_val_map_fixup); - } + do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3); - printf("#%d %s ", i, tests[i].descr); + fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len * sizeof(struct bpf_insn), + "GPL", bpf_vlog, sizeof(bpf_vlog)); - prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len * sizeof(struct bpf_insn), - "GPL", 0); + expected_ret = unpriv && test->result_unpriv != UNDEF ? + test->result_unpriv : test->result; + expected_err = unpriv && test->errstr_unpriv ? + test->errstr_unpriv : test->errstr; + if (expected_ret == ACCEPT) { + if (fd_prog < 0) { + printf("FAIL\nFailed to load prog '%s'!\n", + strerror(errno)); + goto fail_log; + } + } else { + if (fd_prog >= 0) { + printf("FAIL\nUnexpected success to load!\n"); + goto fail_log; + } + if (!strstr(bpf_vlog, expected_err)) { + printf("FAIL\nUnexpected error message!\n"); + goto fail_log; + } + } - if (unpriv && tests[i].result_unpriv != UNDEF) - expected_result = tests[i].result_unpriv; - else - expected_result = tests[i].result; + (*passes)++; + printf("OK\n"); +close_fds: + close(fd_prog); + close(fd_f1); + close(fd_f2); + close(fd_f3); + sched_yield(); + return; +fail_log: + (*errors)++; + printf("%s", bpf_vlog); + goto close_fds; +} - if (unpriv && tests[i].errstr_unpriv) - expected_errstr = tests[i].errstr_unpriv; - else - expected_errstr = tests[i].errstr; +static int do_test(bool unpriv, unsigned int from, unsigned int to) +{ + int i, passes = 0, errors = 0; - if (expected_result == ACCEPT) { - if (prog_fd < 0) { - printf("FAIL\nfailed to load prog '%s'\n", - strerror(errno)); - printf("%s", bpf_log_buf); - err_cnt++; - goto fail; - } - } else { - if (prog_fd >= 0) { - printf("FAIL\nunexpected success to load\n"); - printf("%s", bpf_log_buf); - err_cnt++; - goto fail; - } - if (strstr(bpf_log_buf, expected_errstr) == 0) { - printf("FAIL\nunexpected error message: %s", - bpf_log_buf); - err_cnt++; - goto fail; - } - } + for (i = from; i < to; i++) { + struct bpf_test *test = &tests[i]; - pass_cnt++; - printf("OK\n"); -fail: - if (map_fd >= 0) - close(map_fd); - if (prog_array_fd >= 0) - close(prog_array_fd); - if (test_val_map_fd >= 0) - close(test_val_map_fd); - close(prog_fd); + /* Program types that are not supported by non-root we + * skip right away. + */ + if (unpriv && test->prog_type) + continue; + printf("#%d %s ", i, test->descr); + do_test_single(test, unpriv, &passes, &errors); } - printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); - return 0; + printf("Summary: %d PASSED, %d FAILED\n", passes, errors); + return errors ? -errors : 0; } -int main(void) +int main(int argc, char **argv) { - struct rlimit r = {1 << 20, 1 << 20}; + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + struct rlimit rlim = { 1 << 20, 1 << 20 }; + unsigned int from = 0, to = ARRAY_SIZE(tests); + bool unpriv = geteuid() != 0; + + if (argc == 3) { + unsigned int l = atoi(argv[argc - 2]); + unsigned int u = atoi(argv[argc - 1]); + + if (l < to && u < to) { + from = l; + to = u + 1; + } + } else if (argc == 2) { + unsigned int t = atoi(argv[argc - 1]); + + if (t < to) { + from = t; + to = t + 1; + } + } - setrlimit(RLIMIT_MEMLOCK, &r); - return test(); + setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf); + return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0840684deb7d..afe109e5508a 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,4 +3,5 @@ psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu +reuseport_bpf_numa reuseport_dualstack diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0e5340742620..e24e4c82542e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,14 +1,17 @@ # Makefile for net selftests -CFLAGS = -Wall -O2 -g - +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack +NET_PROGS = socket +NET_PROGS += psock_fanout psock_tpacket +NET_PROGS += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa +NET_PROGS += reuseport_dualstack all: $(NET_PROGS) +reuseport_bpf_numa: LDFLAGS += -lnuma %: %.c - $(CC) $(CFLAGS) -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh TEST_FILES := $(NET_PROGS) diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c new file mode 100644 index 000000000000..6f20bc9ff627 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -0,0 +1,255 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. Same test as + * in reuseport_bpf_cpu, only as one socket per NUMA node. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> +#include <numa.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + static char bpf_log_buf[65536]; + static const char bpf_license[] = ""; + + int bpf_fd; + const struct bpf_insn prog[] = { + /* R0 = bpf_get_numa_node_id() */ + { BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id }, + /* return R0 */ + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; + attr.log_size = sizeof(bpf_log_buf); + attr.log_level = 1; + + bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (bpf_fd < 0) + error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf); + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, + sizeof(bpf_fd))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF"); + + close(bpf_fd); +} + +static void send_from_node(int node_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + if (numa_run_on_node(node_id) < 0) + error(1, errno, "failed to pin to node"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send node %d, receive socket %d\n", node_id, i); + if (node_id != i) + error(1, 0, "node id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, node; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (node = 0; node < len; ++node) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[node]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (node = 0; node < len; ++node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + /* Reverse iterate */ + for (node = len - 1; node >= 0; --node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + close(epfd); + for (node = 0; node < len; ++node) + close(rcv_fd[node]); +} + +int main(void) +{ + int *rcv_fd, nodes; + + if (numa_available() < 0) + error(1, errno, "no numa api support"); + + nodes = numa_max_node() + 1; + + rcv_fd = calloc(nodes, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} |