diff options
315 files changed, 9087 insertions, 5598 deletions
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index b519503be51a..4717bc24eada 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -4,19 +4,24 @@ This file provides information on what the device node for the Ethernet AVB interface contains. Required properties: -- compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC. - "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC. - "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC. - "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC. - "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC. - "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC. - "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC. - "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface. - "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface. +- compatible: Must contain one or more of the following: + - "renesas,etheravb-r8a7743" for the R8A7743 SoC. + - "renesas,etheravb-r8a7790" for the R8A7790 SoC. + - "renesas,etheravb-r8a7791" for the R8A7791 SoC. + - "renesas,etheravb-r8a7792" for the R8A7792 SoC. + - "renesas,etheravb-r8a7793" for the R8A7793 SoC. + - "renesas,etheravb-r8a7794" for the R8A7794 SoC. + - "renesas,etheravb-rcar-gen2" as a fallback for the above + R-Car Gen2 and RZ/G1 devices. - When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first - followed by the generic version. + - "renesas,etheravb-r8a7795" for the R8A7795 SoC. + - "renesas,etheravb-r8a7796" for the R8A7796 SoC. + - "renesas,etheravb-rcar-gen3" as a fallback for the above + R-Car Gen3 devices. + + When compatible with the generic version, nodes must list the + SoC-specific version corresponding to the platform first followed by + the generic version. - reg: offset and length of (1) the register block and (2) the stream buffer. - interrupts: A list of interrupt-specifiers, one for each entry in diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt new file mode 100644 index 000000000000..38f9ec076743 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -0,0 +1,55 @@ +XILINX AXI ETHERNET Device Tree Bindings +-------------------------------------------------------- + +Also called AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core +provides connectivity to an external ethernet PHY supporting different +interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two +segments of memory for buffering TX and RX, as well as the capability of +offloading TX/RX checksum calculation off the processor. + +Management configuration is done through the AXI interface, while payload is +sent and received through means of an AXI DMA controller. This driver +includes the DMA driver code, so this driver is incompatible with AXI DMA +driver. + +For more details about mdio please refer phy.txt file in the same directory. + +Required properties: +- compatible : Must be one of "xlnx,axi-ethernet-1.00.a", + "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a" +- reg : Address and length of the IO space. +- interrupts : Should be a list of two interrupt, TX and RX. +- phy-handle : Should point to the external phy device. + See ethernet.txt file in the same directory. +- xlnx,rxmem : Set to allocated memory buffer for Rx/Tx in the hardware + +Optional properties: +- phy-mode : See ethernet.txt +- xlnx,phy-type : Deprecated, do not use, but still accepted in preference + to phy-mode. +- xlnx,txcsum : 0 or empty for disabling TX checksum offload, + 1 to enable partial TX checksum offload, + 2 to enable full TX checksum offload +- xlnx,rxcsum : Same values as xlnx,txcsum but for RX checksum offload + +Example: + axi_ethernet_eth: ethernet@40c00000 { + compatible = "xlnx,axi-ethernet-1.00.a"; + device_type = "network"; + interrupt-parent = <µblaze_0_axi_intc>; + interrupts = <2 0>; + phy-mode = "mii"; + reg = <0x40c00000 0x40000>; + xlnx,rxcsum = <0x2>; + xlnx,rxmem = <0x800>; + xlnx,txcsum = <0x2>; + phy-handle = <&phy0>; + axi_ethernetlite_0_mdio: mdio { + #address-cells = <1>; + #size-cells = <0>; + phy0: phy@0 { + device_type = "ethernet-phy"; + reg = <1>; + }; + }; + }; diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 974ab47ae53a..84c9b8cee780 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -353,12 +353,7 @@ tcp_l3mdev_accept - BOOLEAN compiled with CONFIG_NET_L3_MASTER_DEV. tcp_low_latency - BOOLEAN - If set, the TCP stack makes decisions that prefer lower - latency as opposed to higher throughput. By default, this - option is not set meaning that higher throughput is preferred. - An example of an application where this default should be - changed would be a Beowulf compute cluster. - Default: 0 + This is a legacy option, it has no effect anymore. tcp_max_orphans - INTEGER Maximal number of TCP sockets not attached to any user file handle, @@ -1291,8 +1286,7 @@ tag - INTEGER xfrm4_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. igmp_link_local_mcast_reports - BOOLEAN Enable IGMP reports for link local multicast groups in the @@ -1778,8 +1772,7 @@ ratelimit - INTEGER xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. IPv6 Update by: diff --git a/MAINTAINERS b/MAINTAINERS index 567343b8ffaa..8c5707f5860a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5101,6 +5101,7 @@ F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h F: include/linux/platform_data/mdio-gpio.h +F: include/linux/platform_data/mdio-bcm-unimac.h F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h F: include/uapi/linux/mii.h diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 906705e5f776..acf16c323e38 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -2374,7 +2374,7 @@ MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles"); /********** module entry **********/ -static struct pci_device_id amb_pci_tbl[] = { +static const struct pci_device_id amb_pci_tbl[] = { { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 }, { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 }, { 0, } diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index b042ec458544..ce47eb17901d 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2292,7 +2292,7 @@ err_disable: } -static struct pci_device_id eni_pci_tbl[] = { +static const struct pci_device_id eni_pci_tbl[] = { { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_FPGA), 0 /* FPGA */ }, { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_ASIC), 1 /* ASIC */ }, { 0, } diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 22dcab952a24..6b6368a56526 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -2030,7 +2030,7 @@ static void firestream_remove_one(struct pci_dev *pdev) func_exit (); } -static struct pci_device_id firestream_pci_tbl[] = { +static const struct pci_device_id firestream_pci_tbl[] = { { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS50), FS_IS50}, { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS155), FS_IS155}, { 0, } diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f0433adcd8fc..f8b7e86907cc 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2757,7 +2757,7 @@ static void fore200e_pca_remove_one(struct pci_dev *pci_dev) } -static struct pci_device_id fore200e_pca_tbl[] = { +static const struct pci_device_id fore200e_pca_tbl[] = { { PCI_VENDOR_ID_FORE, PCI_DEVICE_ID_FORE_PCA200E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (unsigned long) &fore200e_bus[0] }, { 0, } diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 37ee21c5a5ca..8f6156d475d1 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -2851,7 +2851,7 @@ MODULE_PARM_DESC(irq_coalesce, "use interrupt coalescing (default 1)"); module_param(sdh, bool, 0); MODULE_PARM_DESC(sdh, "use SDH framing (default 0)"); -static struct pci_device_id he_pci_tbl[] = { +static const struct pci_device_id he_pci_tbl[] = { { PCI_VDEVICE(FORE, PCI_DEVICE_ID_FORE_HE), 0 }, { 0, } }; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 0f18480b33b5..7e76b35f422c 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2867,7 +2867,7 @@ MODULE_PARM_DESC(max_tx_size, "maximum size of TX AAL5 frames"); MODULE_PARM_DESC(max_rx_size, "maximum size of RX AAL5 frames"); MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles"); -static struct pci_device_id hrz_pci_tbl[] = { +static const struct pci_device_id hrz_pci_tbl[] = { { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_HORIZON, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { 0, } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 60bacba03d17..b7a168c46692 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3725,7 +3725,7 @@ err_out_disable_pdev: return err; } -static struct pci_device_id idt77252_pci_tbl[] = +static const struct pci_device_id idt77252_pci_tbl[] = { { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77252), 0 }, { 0, } diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index a4fa6c82261e..fc72b763fdd7 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -3266,7 +3266,7 @@ static void ia_remove_one(struct pci_dev *pdev) kfree(iadev); } -static struct pci_device_id ia_pci_tbl[] = { +static const struct pci_device_id ia_pci_tbl[] = { { PCI_VENDOR_ID_IPHASE, 0x0008, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_IPHASE, 0x0009, PCI_ANY_ID, PCI_ANY_ID, }, { 0,} diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 1a9bc51284b0..2351dad78ff5 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2589,7 +2589,7 @@ static int lanai_init_one(struct pci_dev *pci, return result; } -static struct pci_device_id lanai_pci_tbl[] = { +static const struct pci_device_id lanai_pci_tbl[] = { { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAI2) }, { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAIHB) }, { 0, } /* terminal entry */ diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index d879f3bca107..9588d80f318e 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -253,7 +253,7 @@ static void nicstar_remove_one(struct pci_dev *pcidev) kfree(card); } -static struct pci_device_id nicstar_pci_tbl[] = { +static const struct pci_device_id nicstar_pci_tbl[] = { { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77201), 0 }, {0,} /* terminate list */ }; diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index c8f2ca6d8b29..585984ee7dbd 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -1476,7 +1476,7 @@ static void fpga_remove(struct pci_dev *dev) kfree(card); } -static struct pci_device_id fpga_pci_tbl[] = { +static const struct pci_device_id fpga_pci_tbl[] = { { 0x10ee, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { 0, } }; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 07bdd51b3b9a..1ef67db03c8e 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1642,7 +1642,7 @@ out_free: MODULE_LICENSE("GPL"); -static struct pci_device_id zatm_pci_tbl[] = { +static const struct pci_device_id zatm_pci_tbl[] = { { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER }, { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 }, { 0, } diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index 19982a4a9bba..18f5ed082f41 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_BNXT_RE tristate "Broadcom Netxtreme HCA support" depends on ETHERNET && NETDEVICES && PCI && INET && DCB + depends on MAY_USE_DEVLINK select NET_VENDOR_BROADCOM select BNXT ---help--- diff --git a/drivers/isdn/hardware/eicon/divacapi.h b/drivers/isdn/hardware/eicon/divacapi.h index a315a2914d70..c4868a0d82f4 100644 --- a/drivers/isdn/hardware/eicon/divacapi.h +++ b/drivers/isdn/hardware/eicon/divacapi.h @@ -26,15 +26,7 @@ /*#define DEBUG */ - - - - - - - - - +#include <linux/types.h> #define IMPLEMENT_DTMF 1 #define IMPLEMENT_LINE_INTERCONNECT2 1 @@ -82,8 +74,6 @@ #define CODEC_PERMANENT 0x02 #define ADV_VOICE 0x03 #define MAX_CIP_TYPES 5 /* kind of CIP types for group optimization */ -#define C_IND_MASK_DWORDS ((MAX_APPL + 32) >> 5) - #define FAX_CONNECT_INFO_BUFFER_SIZE 256 #define NCPI_BUFFER_SIZE 256 @@ -265,8 +255,8 @@ struct _PLCI { word ncci_ring_list; byte inc_dis_ncci_table[MAX_CHANNELS_PER_PLCI]; t_std_internal_command internal_command_queue[MAX_INTERNAL_COMMAND_LEVELS]; - dword c_ind_mask_table[C_IND_MASK_DWORDS]; - dword group_optimization_mask_table[C_IND_MASK_DWORDS]; + DECLARE_BITMAP(c_ind_mask_table, MAX_APPL); + DECLARE_BITMAP(group_optimization_mask_table, MAX_APPL); byte RBuffer[200]; dword msg_in_queue[MSG_IN_QUEUE_SIZE/sizeof(dword)]; API_SAVE saved_msg; diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 3b11422b1cce..eadd1ed1e014 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -23,9 +23,7 @@ * */ - - - +#include <linux/bitmap.h> #include "platform.h" #include "di_defs.h" @@ -35,19 +33,9 @@ #include "mdm_msg.h" #include "divasync.h" - - #define FILE_ "MESSAGE.C" #define dprintf - - - - - - - - /*------------------------------------------------------------------*/ /* This is options supported for all adapters that are server by */ /* XDI driver. Allo it is not necessary to ask it from every adapter*/ @@ -72,9 +60,6 @@ static dword diva_xdi_extended_features = 0; /*------------------------------------------------------------------*/ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci); -static void set_group_ind_mask(PLCI *plci); -static void clear_group_ind_mask_bit(PLCI *plci, word b); -static byte test_group_ind_mask_bit(PLCI *plci, word b); void AutomaticLaw(DIVA_CAPI_ADAPTER *); word CapiRelease(word); word CapiRegister(word); @@ -1087,106 +1072,6 @@ static void plci_remove(PLCI *plci) } /*------------------------------------------------------------------*/ -/* Application Group function helpers */ -/*------------------------------------------------------------------*/ - -static void set_group_ind_mask(PLCI *plci) -{ - word i; - - for (i = 0; i < C_IND_MASK_DWORDS; i++) - plci->group_optimization_mask_table[i] = 0xffffffffL; -} - -static void clear_group_ind_mask_bit(PLCI *plci, word b) -{ - plci->group_optimization_mask_table[b >> 5] &= ~(1L << (b & 0x1f)); -} - -static byte test_group_ind_mask_bit(PLCI *plci, word b) -{ - return ((plci->group_optimization_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0); -} - -/*------------------------------------------------------------------*/ -/* c_ind_mask operations for arbitrary MAX_APPL */ -/*------------------------------------------------------------------*/ - -static void clear_c_ind_mask(PLCI *plci) -{ - word i; - - for (i = 0; i < C_IND_MASK_DWORDS; i++) - plci->c_ind_mask_table[i] = 0; -} - -static byte c_ind_mask_empty(PLCI *plci) -{ - word i; - - i = 0; - while ((i < C_IND_MASK_DWORDS) && (plci->c_ind_mask_table[i] == 0)) - i++; - return (i == C_IND_MASK_DWORDS); -} - -static void set_c_ind_mask_bit(PLCI *plci, word b) -{ - plci->c_ind_mask_table[b >> 5] |= (1L << (b & 0x1f)); -} - -static void clear_c_ind_mask_bit(PLCI *plci, word b) -{ - plci->c_ind_mask_table[b >> 5] &= ~(1L << (b & 0x1f)); -} - -static byte test_c_ind_mask_bit(PLCI *plci, word b) -{ - return ((plci->c_ind_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0); -} - -static void dump_c_ind_mask(PLCI *plci) -{ - word i, j, k; - dword d; - char *p; - char buf[40]; - - for (i = 0; i < C_IND_MASK_DWORDS; i += 4) - { - p = buf + 36; - *p = '\0'; - for (j = 0; j < 4; j++) - { - if (i + j < C_IND_MASK_DWORDS) - { - d = plci->c_ind_mask_table[i + j]; - for (k = 0; k < 8; k++) - { - *(--p) = hex_asc_lo(d); - d >>= 4; - } - } - else if (i != 0) - { - for (k = 0; k < 8; k++) - *(--p) = ' '; - } - *(--p) = ' '; - } - dbug(1, dprintf("c_ind_mask =%s", (char *) p)); - } -} - - - - - -#define dump_plcis(a) - - - -/*------------------------------------------------------------------*/ /* translation function for each message */ /*------------------------------------------------------------------*/ @@ -1457,13 +1342,13 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, return 1; } else if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); - dump_c_ind_mask(plci); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); Reject = GET_WORD(parms[0].info); dbug(1, dprintf("Reject=0x%x", Reject)); if (Reject) { - if (c_ind_mask_empty(plci)) + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if ((Reject & 0xff00) == 0x3400) { @@ -1553,11 +1438,8 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, sig_req(plci, CALL_RES, 0); } - for (i = 0; i < max_appl; i++) { - if (test_c_ind_mask_bit(plci, i)) { - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); - } - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); } } return 1; @@ -1584,13 +1466,10 @@ static byte disconnect_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a, { if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); plci->appl = appl; - for (i = 0; i < max_appl; i++) - { - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); plci->State = OUTG_DIS_PENDING; } if (plci->Sig.Id && plci->appl) @@ -1634,7 +1513,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, { /* clear ind mask bit, just in case of collsion of */ /* DISCONNECT_IND and CONNECT_RES */ - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); ncci_free_receive_buffers(plci, 0); if (plci_remove_check(plci)) { @@ -1642,7 +1521,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, } if (plci->State == INC_DIS_PENDING || plci->State == SUSPENDING) { - if (c_ind_mask_empty(plci)) { + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (plci->State != SUSPENDING) plci->State = IDLE; dbug(1, dprintf("chs=%d", plci->channels)); if (!plci->channels) { @@ -3351,13 +3230,11 @@ static byte select_b_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a, } plci->State = INC_CON_CONNECTED_ALERT; plci->appl = appl; - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); - dump_c_ind_mask(plci); - for (i = 0; i < max_appl; i++) /* disconnect the other appls */ - { /* its quasi a connect */ - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); - } + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); + /* disconnect the other appls its quasi a connect */ + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); } api_save_msg(msg, "s", &plci->saved_msg); @@ -5692,19 +5569,17 @@ static void sig_ind(PLCI *plci) cip = find_cip(a, parms[4], parms[6]); cip_mask = 1L << cip; dbug(1, dprintf("cip=%d,cip_mask=%lx", cip, cip_mask)); - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); if (!remove_started && !a->adapter_disabled) { - set_c_ind_mask_bit(plci, MAX_APPL); group_optimization(a, plci); - for (i = 0; i < max_appl; i++) { + for_each_set_bit(i, plci->group_optimization_mask_table, max_appl) { if (application[i].Id && (a->CIP_Mask[i] & 1 || a->CIP_Mask[i] & cip_mask) - && CPN_filter_ok(parms[0], a, i) - && test_group_ind_mask_bit(plci, i)) { + && CPN_filter_ok(parms[0], a, i)) { dbug(1, dprintf("storedcip_mask[%d]=0x%lx", i, a->CIP_Mask[i])); - set_c_ind_mask_bit(plci, i); - dump_c_ind_mask(plci); + __set_bit(i, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); plci->State = INC_CON_PENDING; plci->call_dir = (plci->call_dir & ~(CALL_DIR_OUT | CALL_DIR_ORIGINATE)) | CALL_DIR_IN | CALL_DIR_ANSWER; @@ -5750,10 +5625,9 @@ static void sig_ind(PLCI *plci) SendMultiIE(plci, Id, multi_pi_parms, PI, 0x210, true)); } } - clear_c_ind_mask_bit(plci, MAX_APPL); - dump_c_ind_mask(plci); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); } - if (c_ind_mask_empty(plci)) { + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { sig_req(plci, HANGUP, 0); send_req(plci); plci->State = IDLE; @@ -5994,13 +5868,13 @@ static void sig_ind(PLCI *plci) break; case RESUME: - clear_c_ind_mask_bit(plci, (word)(plci->appl->Id - 1)); + __clear_bit(plci->appl->Id - 1, plci->c_ind_mask_table); PUT_WORD(&resume_cau[4], GOOD); sendf(plci->appl, _FACILITY_I, Id, 0, "ws", (word)3, resume_cau); break; case SUSPEND: - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); if (plci->NL.Id && !plci->nl_remove_id) { mixer_remove(plci); @@ -6037,15 +5911,12 @@ static void sig_ind(PLCI *plci) if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - for (i = 0; i < max_appl; i++) - { - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); } else { - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); } if (!plci->appl) { @@ -6055,7 +5926,7 @@ static void sig_ind(PLCI *plci) a->listen_active--; } plci->State = INC_DIS_PENDING; - if (c_ind_mask_empty(plci)) + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { plci->State = IDLE; if (plci->NL.Id && !plci->nl_remove_id) @@ -6341,14 +6212,10 @@ static void SendInfo(PLCI *plci, dword Id, byte **parms, byte iesent) || Info_Number == DSP || Info_Number == UUI) { - for (j = 0; j < max_appl; j++) - { - if (test_c_ind_mask_bit(plci, j)) - { - dbug(1, dprintf("Ovl_Ind")); - iesent = true; - sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element); - } + for_each_set_bit(j, plci->c_ind_mask_table, max_appl) { + dbug(1, dprintf("Ovl_Ind")); + iesent = true; + sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element); } } } /* all other signalling states */ @@ -6416,14 +6283,10 @@ static byte SendMultiIE(PLCI *plci, dword Id, byte **parms, byte ie_type, } else if (!plci->appl && Info_Number) { /* overlap receiving broadcast */ - for (j = 0; j < max_appl; j++) - { - if (test_c_ind_mask_bit(plci, j)) - { - iesent = true; - dbug(1, dprintf("Mlt_Ovl_Ind")); - sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element); - } + for_each_set_bit(j, plci->c_ind_mask_table, max_appl) { + iesent = true; + dbug(1, dprintf("Mlt_Ovl_Ind")); + sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element); } } /* all other signalling states */ else if (Info_Number @@ -7270,7 +7133,6 @@ static word get_plci(DIVA_CAPI_ADAPTER *a) word i, j; PLCI *plci; - dump_plcis(a); for (i = 0; i < a->max_plci && a->plci[i].Id; i++); if (i == a->max_plci) { dbug(1, dprintf("get_plci: out of PLCIs")); @@ -7321,8 +7183,8 @@ static word get_plci(DIVA_CAPI_ADAPTER *a) plci->ncci_ring_list = 0; for (j = 0; j < MAX_CHANNELS_PER_PLCI; j++) plci->inc_dis_ncci_table[j] = 0; - clear_c_ind_mask(plci); - set_group_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); + bitmap_fill(plci->group_optimization_mask_table, MAX_APPL); plci->fax_connect_info_length = 0; plci->nsf_control_bits = 0; plci->ncpi_state = 0x00; @@ -9373,10 +9235,10 @@ word CapiRelease(word Id) if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - if (test_c_ind_mask_bit(plci, (word)(Id - 1))) + if (test_bit(Id - 1, plci->c_ind_mask_table)) { - clear_c_ind_mask_bit(plci, (word)(Id - 1)); - if (c_ind_mask_empty(plci)) + __clear_bit(Id - 1, plci->c_ind_mask_table); + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { sig_req(plci, HANGUP, 0); send_req(plci); @@ -9384,10 +9246,10 @@ word CapiRelease(word Id) } } } - if (test_c_ind_mask_bit(plci, (word)(Id - 1))) + if (test_bit(Id - 1, plci->c_ind_mask_table)) { - clear_c_ind_mask_bit(plci, (word)(Id - 1)); - if (c_ind_mask_empty(plci)) + __clear_bit(Id - 1, plci->c_ind_mask_table); + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (!plci->appl) { @@ -9452,7 +9314,7 @@ word CapiRelease(word Id) static word plci_remove_check(PLCI *plci) { if (!plci) return true; - if (!plci->NL.Id && c_ind_mask_empty(plci)) + if (!plci->NL.Id && bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (plci->Sig.Id == 0xff) plci->Sig.Id = 0; @@ -14735,7 +14597,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) word appl_number_group_type[MAX_APPL]; PLCI *auxplci; - set_group_ind_mask(plci); /* all APPLs within this inc. call are allowed to dial in */ + /* all APPLs within this inc. call are allowed to dial in */ + bitmap_fill(plci->group_optimization_mask_table, MAX_APPL); if (!a->group_optimization_enabled) { @@ -14771,13 +14634,12 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) if (a->plci[k].Id) { auxplci = &a->plci[k]; - if (auxplci->appl == &application[i]) /* application has a busy PLCI */ - { + if (auxplci->appl == &application[i]) { + /* application has a busy PLCI */ busy = true; dbug(1, dprintf("Appl 0x%x is busy", i + 1)); - } - else if (test_c_ind_mask_bit(auxplci, i)) /* application has an incoming call pending */ - { + } else if (test_bit(i, plci->c_ind_mask_table)) { + /* application has an incoming call pending */ busy = true; dbug(1, dprintf("Appl 0x%x has inc. call pending", i + 1)); } @@ -14826,7 +14688,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) if (appl_number_group_type[i] == appl_number_group_type[j]) { dbug(1, dprintf("Appl 0x%x is member of group 0x%x, no call", j + 1, appl_number_group_type[j])); - clear_group_ind_mask_bit(plci, j); /* disable call on other group members */ + /* disable call on other group members */ + __clear_bit(j, plci->group_optimization_mask_table); appl_number_group_type[j] = 0; /* remove disabled group member from group list */ } } @@ -14834,7 +14697,7 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) } else /* application should not get a call */ { - clear_group_ind_mask_bit(plci, i); + __clear_bit(i, plci->group_optimization_mask_table); } } diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index a306de4318d7..9375cef22420 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -311,9 +311,7 @@ module_param(ipddp_mode, int, 0); static int __init ipddp_init_module(void) { dev_ipddp = ipddp_init(); - if (IS_ERR(dev_ipddp)) - return PTR_ERR(dev_ipddp); - return 0; + return PTR_ERR_OR_ZERO(dev_ipddp); } static void __exit ipddp_cleanup_module(void) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index cbb4f8566bbe..d09b2b46ab63 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -20,7 +20,7 @@ #include <linux/if_arcnet.h> #ifdef __KERNEL__ -#include <linux/irqreturn.h> +#include <linux/interrupt.h> /* * RECON_THRESHOLD is the maximum number of RECON messages to receive diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 01cab9548785..eb7f76753c9c 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -109,7 +109,7 @@ static struct attribute *com20020_state_attrs[] = { NULL, }; -static struct attribute_group com20020_state_group = { +static const struct attribute_group com20020_state_group = { .name = NULL, .attrs = com20020_state_attrs, }; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 770623a0cc01..040b493f60ae 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -759,7 +759,7 @@ static struct attribute *per_bond_attrs[] = { NULL, }; -static struct attribute_group bonding_group = { +static const struct attribute_group bonding_group = { .name = "bonding", .attrs = per_bond_attrs, }; diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 0e0df0ba288c..f37ce0e1b603 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -1232,7 +1232,7 @@ static struct attribute *at91_sysfs_attrs[] = { NULL, }; -static struct attribute_group at91_sysfs_attr_group = { +static const struct attribute_group at91_sysfs_attr_group = { .attrs = at91_sysfs_attrs, }; diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 2ba1a81500c1..12a53c8e8e1d 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1875,7 +1875,7 @@ static struct attribute *ican3_sysfs_attrs[] = { NULL, }; -static struct attribute_group ican3_sysfs_attr_group = { +static const struct attribute_group ican3_sysfs_attr_group = { .attrs = ican3_sysfs_attrs, }; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index cd76e61f1fca..8e430d1ee297 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -20,6 +20,9 @@ #include "lan9303.h" +/* 13.2 System Control and Status Registers + * Multiply register number by 4 to get address offset. + */ #define LAN9303_CHIP_REV 0x14 # define LAN9303_CHIP_ID 0x9303 #define LAN9303_IRQ_CFG 0x15 @@ -53,6 +56,9 @@ #define LAN9303_VIRT_PHY_BASE 0x70 #define LAN9303_VIRT_SPECIAL_CTRL 0x77 +/*13.4 Switch Fabric Control and Status Registers + * Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA. + */ #define LAN9303_SW_DEV_ID 0x0000 #define LAN9303_SW_RESET 0x0001 #define LAN9303_SW_RESET_RESET BIT(0) @@ -242,7 +248,7 @@ static int lan9303_virt_phy_reg_write(struct lan9303 *chip, int regnum, u16 val) return regmap_write(chip->regmap, LAN9303_VIRT_PHY_BASE + regnum, val); } -static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip) +static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip) { int ret, i; u32 reg; @@ -262,7 +268,7 @@ static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip) return -EIO; } -static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum) +static int lan9303_indirect_phy_read(struct lan9303 *chip, int addr, int regnum) { int ret; u32 val; @@ -272,7 +278,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum) mutex_lock(&chip->indirect_mutex); - ret = lan9303_port_phy_reg_wait_for_completion(chip); + ret = lan9303_indirect_phy_wait_for_completion(chip); if (ret) goto on_error; @@ -281,7 +287,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum) if (ret) goto on_error; - ret = lan9303_port_phy_reg_wait_for_completion(chip); + ret = lan9303_indirect_phy_wait_for_completion(chip); if (ret) goto on_error; @@ -299,8 +305,8 @@ on_error: return ret; } -static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum, - unsigned int val) +static int lan9303_indirect_phy_write(struct lan9303 *chip, int addr, + int regnum, u16 val) { int ret; u32 reg; @@ -311,7 +317,7 @@ static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum, mutex_lock(&chip->indirect_mutex); - ret = lan9303_port_phy_reg_wait_for_completion(chip); + ret = lan9303_indirect_phy_wait_for_completion(chip); if (ret) goto on_error; @@ -328,6 +334,12 @@ on_error: return ret; } +const struct lan9303_phy_ops lan9303_indirect_phy_ops = { + .phy_read = lan9303_indirect_phy_read, + .phy_write = lan9303_indirect_phy_write, +}; +EXPORT_SYMBOL_GPL(lan9303_indirect_phy_ops); + static int lan9303_switch_wait_for_completion(struct lan9303 *chip) { int ret, i; @@ -427,14 +439,15 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip) * Special reg 18 of phy 3 reads as 0x0000, if 'phy_addr_sel_strap' is 0 * and the IDs are 0-1-2, else it contains something different from * 0x0000, which means 'phy_addr_sel_strap' is 1 and the IDs are 1-2-3. + * 0xffff is returned on MDIO read with no response. */ - reg = lan9303_port_phy_reg_read(chip, 3, MII_LAN911X_SPECIAL_MODES); + reg = chip->ops->phy_read(chip, 3, MII_LAN911X_SPECIAL_MODES); if (reg < 0) { dev_err(chip->dev, "Failed to detect phy config: %d\n", reg); return reg; } - if (reg != 0) + if ((reg != 0) && (reg != 0xffff)) chip->phy_addr_sel_strap = 1; else chip->phy_addr_sel_strap = 0; @@ -719,7 +732,7 @@ static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum) if (phy > phy_base + 2) return -ENODEV; - return lan9303_port_phy_reg_read(chip, phy, regnum); + return chip->ops->phy_read(chip, phy, regnum); } static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, @@ -733,7 +746,7 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, if (phy > phy_base + 2) return -ENODEV; - return lan9303_phy_reg_write(chip, phy, regnum, val); + return chip->ops->phy_write(chip, phy, regnum, val); } static int lan9303_port_enable(struct dsa_switch *ds, int port, @@ -766,13 +779,13 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port, switch (port) { case 1: lan9303_disable_packet_processing(chip, LAN9303_PORT_1_OFFSET); - lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 1, - MII_BMCR, BMCR_PDOWN); + lan9303_phy_write(ds, chip->phy_addr_sel_strap + 1, + MII_BMCR, BMCR_PDOWN); break; case 2: lan9303_disable_packet_processing(chip, LAN9303_PORT_2_OFFSET); - lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 2, - MII_BMCR, BMCR_PDOWN); + lan9303_phy_write(ds, chip->phy_addr_sel_strap + 2, + MII_BMCR, BMCR_PDOWN); break; default: dev_dbg(chip->dev, diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index d1512dad2d90..4d8be555ff4d 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -2,6 +2,15 @@ #include <linux/device.h> #include <net/dsa.h> +struct lan9303; + +struct lan9303_phy_ops { + /* PHY 1 and 2 access*/ + int (*phy_read)(struct lan9303 *chip, int port, int regnum); + int (*phy_write)(struct lan9303 *chip, int port, + int regnum, u16 val); +}; + struct lan9303 { struct device *dev; struct regmap *regmap; @@ -11,9 +20,11 @@ struct lan9303 { bool phy_addr_sel_strap; struct dsa_switch *ds; struct mutex indirect_mutex; /* protect indexed register access */ + const struct lan9303_phy_ops *ops; }; extern const struct regmap_access_table lan9303_register_set; +extern const struct lan9303_phy_ops lan9303_indirect_phy_ops; int lan9303_probe(struct lan9303 *chip, struct device_node *np); int lan9303_remove(struct lan9303 *chip); diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index ab3ce0da5071..24ec20f7f444 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -63,6 +63,8 @@ static int lan9303_i2c_probe(struct i2c_client *client, i2c_set_clientdata(client, sw_dev); sw_dev->chip.dev = &client->dev; + sw_dev->chip.ops = &lan9303_indirect_phy_ops; + ret = lan9303_probe(&sw_dev->chip, client->dev.of_node); if (ret != 0) return ret; diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 93c36c0541cf..fc16668a487f 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -40,6 +40,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val) { struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; + reg <<= 2; /* reg num to offset */ mutex_lock(&sw_dev->device->bus->mdio_lock); lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff); lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff); @@ -57,6 +58,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) { struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; + reg <<= 2; /* reg num to offset */ mutex_lock(&sw_dev->device->bus->mdio_lock); *val = lan9303_mdio_real_read(sw_dev->device, reg); *val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16); @@ -65,6 +67,25 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) return 0; } +int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val) +{ + struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); + + return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val); +} + +int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) +{ + struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); + + return mdiobus_read_nested(sw_dev->device->bus, phy, reg); +} + +static const struct lan9303_phy_ops lan9303_mdio_phy_ops = { + .phy_read = lan9303_mdio_phy_read, + .phy_write = lan9303_mdio_phy_write, +}; + static const struct regmap_config lan9303_mdio_regmap_config = { .reg_bits = 8, .val_bits = 32, @@ -106,6 +127,8 @@ static int lan9303_mdio_probe(struct mdio_device *mdiodev) dev_set_drvdata(&mdiodev->dev, sw_dev); sw_dev->chip.dev = &mdiodev->dev; + sw_dev->chip.ops = &lan9303_mdio_phy_ops; + ret = lan9303_probe(&sw_dev->chip, mdiodev->dev.of_node); if (ret != 0) return ret; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5bcdd33101b0..647d5d45c1d6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -810,31 +810,40 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); } -static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) +static int mv88e6xxx_energy_detect_read(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) { - struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) + if (!chip->info->ops->phy_energy_detect_read) return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_phy_read(chip, port, 16, ®); + /* assign eee->eee_enabled and eee->tx_lpi_enabled */ + err = chip->info->ops->phy_energy_detect_read(chip, port, eee); if (err) - goto out; + return err; - e->eee_enabled = !!(reg & 0x0200); - e->tx_lpi_enabled = !!(reg & 0x0100); + /* assign eee->eee_active */ + return mv88e6xxx_port_status_eee(chip, port, eee); +} - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); - if (err) - goto out; +static int mv88e6xxx_energy_detect_write(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) +{ + if (!chip->info->ops->phy_energy_detect_write) + return -EOPNOTSUPP; - e->eee_active = !!(reg & MV88E6352_PORT_STS_EEE); -out: + return chip->info->ops->phy_energy_detect_write(chip, port, eee); +} + +static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_energy_detect_read(chip, port, e); mutex_unlock(&chip->reg_lock); return err; @@ -844,26 +853,10 @@ static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) - return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_phy_read(chip, port, 16, ®); - if (err) - goto out; - - reg &= ~0x0300; - if (e->eee_enabled) - reg |= 0x0200; - if (e->tx_lpi_enabled) - reg |= 0x0100; - - err = mv88e6xxx_phy_write(chip, port, 16, reg); -out: + err = mv88e6xxx_energy_detect_write(chip, port, e); mutex_unlock(&chip->reg_lock); return err; @@ -926,6 +919,22 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, dev_err(ds->dev, "p%d: failed to update state\n", port); } +static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip) +{ + if (chip->info->ops->pot_clear) + return chip->info->ops->pot_clear(chip); + + return 0; +} + +static int mv88e6xxx_rsvd2cpu_setup(struct mv88e6xxx_chip *chip) +{ + if (chip->info->ops->mgmt_rsvd2cpu) + return chip->info->ops->mgmt_rsvd2cpu(chip); + + return 0; +} + static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip) { int err; @@ -2116,7 +2125,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) goto unlock; /* Setup Switch Global 2 Registers */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + if (chip->info->global2_addr) { err = mv88e6xxx_g2_setup(chip); if (err) goto unlock; @@ -2142,16 +2151,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - /* Some generations have the configuration of sending reserved - * management frames to the CPU in global2, others in - * global1. Hence it does not fit the two setup functions - * above. - */ - if (chip->info->ops->mgmt_rsvd2cpu) { - err = chip->info->ops->mgmt_rsvd2cpu(chip); - if (err) - goto unlock; - } + err = mv88e6xxx_pot_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_rsvd2cpu_setup(chip); + if (err) + goto unlock; unlock: mutex_unlock(&chip->reg_lock); @@ -2236,7 +2242,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, if (np) { bus->name = np->full_name; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s", np->full_name); + snprintf(bus->id, MII_BUS_ID_SIZE, "%pOF", np); } else { bus->name = "mv88e6xxx SMI"; snprintf(bus->id, MII_BUS_ID_SIZE, "mv88e6xxx-%d", index++); @@ -2385,7 +2391,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2408,7 +2415,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2441,7 +2448,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2467,7 +2475,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2496,7 +2505,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2512,6 +2521,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2533,6 +2544,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2563,7 +2575,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2587,7 +2600,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2619,7 +2633,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2633,6 +2648,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2653,7 +2670,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2686,7 +2704,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2700,6 +2719,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2720,7 +2741,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2746,7 +2768,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2762,6 +2784,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2782,6 +2806,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2796,6 +2821,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2816,6 +2843,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2830,6 +2858,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2850,6 +2880,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2864,6 +2895,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2884,7 +2917,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2899,6 +2933,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2920,6 +2956,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2934,6 +2971,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, @@ -2952,20 +2991,23 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .stats_get_stats = mv88e6320_stats_get_stats, .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, }; static const struct mv88e6xxx_ops mv88e6321_ops = { - /* MV88E6XXX_FAMILY_6321 */ + /* MV88E6XXX_FAMILY_6320 */ .irl_init_all = mv88e6352_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, @@ -2997,6 +3039,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3018,6 +3062,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3049,7 +3094,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3081,7 +3127,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3095,6 +3142,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -3115,7 +3164,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3130,6 +3180,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3153,6 +3205,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -3167,6 +3220,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3190,6 +3245,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -3206,12 +3262,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6097, .ops = &mv88e6085_ops, }, @@ -3224,11 +3282,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6095, .ops = &mv88e6095_ops, }, @@ -3241,12 +3300,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6097, .ops = &mv88e6097_ops, }, @@ -3259,12 +3320,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6123_ops, }, @@ -3277,11 +3340,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6131_ops, }, @@ -3294,11 +3358,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .atu_move_port_mask = 0x1f, + .g2_irqs = 10, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6341, .ops = &mv88e6141_ops, }, @@ -3311,12 +3377,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6161_ops, }, @@ -3329,12 +3397,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6165_ops, }, @@ -3347,12 +3417,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6171_ops, }, @@ -3365,12 +3437,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6172_ops, }, @@ -3383,12 +3457,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6175_ops, }, @@ -3401,12 +3477,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6176_ops, }, @@ -3419,11 +3497,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6185_ops, }, @@ -3436,12 +3515,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .tag_protocol = DSA_TAG_PROTO_DSA, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .pvt = true, + .multi_chip = true, .atu_move_port_mask = 0x1f, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6190_ops, }, @@ -3454,12 +3535,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6190x_ops, }, @@ -3472,12 +3555,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6191_ops, }, @@ -3490,12 +3575,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6240_ops, }, @@ -3508,12 +3595,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6290_ops, }, @@ -3526,12 +3615,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6320_ops, }, @@ -3544,11 +3634,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6321_ops, }, @@ -3561,11 +3652,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .atu_move_port_mask = 0x1f, + .g2_irqs = 10, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6341, .ops = &mv88e6341_ops, }, @@ -3578,12 +3671,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6350_ops, }, @@ -3596,12 +3691,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6351_ops, }, @@ -3614,12 +3711,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6352_ops, }, [MV88E6390] = { @@ -3631,12 +3730,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6390_ops, }, [MV88E6390X] = { @@ -3648,12 +3749,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6390x_ops, }, }; @@ -3723,7 +3826,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, { if (sw_addr == 0) chip->smi_ops = &mv88e6xxx_smi_single_chip_ops; - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP)) + else if (chip->info->multi_chip) chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops; else return -EINVAL; @@ -3971,7 +4074,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) goto out; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) { + if (chip->info->g2_irqs > 0) { err = mv88e6xxx_g2_irq_setup(chip); if (err) goto out_g1_irq; @@ -3991,7 +4094,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) out_mdio: mv88e6xxx_mdios_unregister(chip); out_g2_irq: - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0) + if (chip->info->g2_irqs > 0 && chip->irq > 0) mv88e6xxx_g2_irq_free(chip); out_g1_irq: if (chip->irq > 0) { @@ -4013,7 +4116,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_mdios_unregister(chip); if (chip->irq > 0) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) + if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); mv88e6xxx_g1_irq_free(chip); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 086444016352..9111e1316250 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -97,133 +97,6 @@ enum mv88e6xxx_family { MV88E6XXX_FAMILY_6390, /* 6190 6190X 6191 6290 6390 6390X */ }; -enum mv88e6xxx_cap { - /* Energy Efficient Ethernet. - */ - MV88E6XXX_CAP_EEE, - - /* Multi-chip Addressing Mode. - * Some chips respond to only 2 registers of its own SMI device address - * when it is non-zero, and use indirect access to internal registers. - */ - MV88E6XXX_CAP_SMI_CMD, /* (0x00) SMI Command */ - MV88E6XXX_CAP_SMI_DATA, /* (0x01) SMI Data */ - - /* Switch Global (1) Registers. - */ - MV88E6XXX_CAP_G1_ATU_FID, /* (0x01) ATU FID Register */ - MV88E6XXX_CAP_G1_VTU_FID, /* (0x02) VTU FID Register */ - - /* Switch Global 2 Registers. - * The device contains a second set of global 16-bit registers. - */ - MV88E6XXX_CAP_GLOBAL2, - MV88E6XXX_CAP_G2_INT, /* (0x00) Interrupt Status */ - MV88E6XXX_CAP_G2_MGMT_EN_2X, /* (0x02) MGMT Enable Register 2x */ - MV88E6XXX_CAP_G2_MGMT_EN_0X, /* (0x03) MGMT Enable Register 0x */ - MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ - - /* Per VLAN Spanning Tree Unit (STU). - * The Port State database, if present, is accessed through VTU - * operations and dedicated SID registers. See MV88E6352_G1_VTU_SID. - */ - MV88E6XXX_CAP_STU, - - /* VLAN Table Unit. - * The VTU is used to program 802.1Q VLANs. See MV88E6XXX_G1_VTU_OP. - */ - MV88E6XXX_CAP_VTU, -}; - -/* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) - -#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) - -#define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) - -#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_INT BIT_ULL(MV88E6XXX_CAP_G2_INT) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) - -/* Multi-chip Addressing Mode */ -#define MV88E6XXX_FLAGS_MULTI_CHIP \ - (MV88E6XXX_FLAG_SMI_CMD | \ - MV88E6XXX_FLAG_SMI_DATA) - -#define MV88E6XXX_FLAGS_FAMILY_6095 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6185 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6320 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6341 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6351 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6352 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6390 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - struct mv88e6xxx_ops; struct mv88e6xxx_info { @@ -235,11 +108,18 @@ struct mv88e6xxx_info { unsigned int max_vid; unsigned int port_base_addr; unsigned int global1_addr; + unsigned int global2_addr; unsigned int age_time_coeff; unsigned int g1_irqs; + unsigned int g2_irqs; bool pvt; + + /* Multi-chip Addressing Mode. + * Some chips respond to only 2 registers of its own SMI device address + * when it is non-zero, and use indirect access to internal registers. + */ + bool multi_chip; enum dsa_tag_protocol tag_protocol; - unsigned long long flags; /* Mask for FromPort and ToPort value of PortVec used in ATU Move * operation. 0 means that the ATU Move operation is not supported. @@ -359,6 +239,15 @@ struct mv88e6xxx_ops { struct mii_bus *bus, int addr, int reg, u16 val); + /* Copper Energy Detect operations */ + int (*phy_energy_detect_read)(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + int (*phy_energy_detect_write)(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + + /* Priority Override Table operations */ + int (*pot_clear)(struct mv88e6xxx_chip *chip); + /* PHY Polling Unit (PPU) operations */ int (*ppu_enable)(struct mv88e6xxx_chip *chip); int (*ppu_disable)(struct mv88e6xxx_chip *chip); @@ -449,7 +338,6 @@ struct mv88e6xxx_ops { int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port); const struct mv88e6xxx_irq_ops *watchdog_ops; - /* Can be either in g1 or g2, so don't use a prefix */ int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip); /* Power on/off a SERDES interface */ @@ -482,12 +370,6 @@ struct mv88e6xxx_hw_stat { int type; }; -static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, - unsigned long flags) -{ - return (chip->info->flags & flags) == flags; -} - static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip) { return chip->info->pvt; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 158d0f499874..16f556261022 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -22,48 +22,99 @@ static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) { - return mv88e6xxx_read(chip, MV88E6XXX_G2, reg, val); + return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val); } static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) { - return mv88e6xxx_write(chip, MV88E6XXX_G2, reg, val); + return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val); } static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) { - return mv88e6xxx_update(chip, MV88E6XXX_G2, reg, update); + return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update); } static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) { - return mv88e6xxx_wait(chip, MV88E6XXX_G2, reg, mask); + return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask); +} + +/* Offset 0x00: Interrupt Source Register */ + +static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src) +{ + /* Read (and clear most of) the Interrupt Source bits */ + return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SRC, src); +} + +/* Offset 0x01: Interrupt Mask Register */ + +static int mv88e6xxx_g2_int_mask(struct mv88e6xxx_chip *chip, u16 mask) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, mask); } /* Offset 0x02: Management Enable 2x */ + +static int mv88e6xxx_g2_mgmt_enable_2x(struct mv88e6xxx_chip *chip, u16 en2x) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, en2x); +} + /* Offset 0x03: Management Enable 0x */ -int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +static int mv88e6xxx_g2_mgmt_enable_0x(struct mv88e6xxx_chip *chip, u16 en0x) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, en0x); +} + +/* Offset 0x05: Switch Management Register */ + +static int mv88e6xxx_g2_switch_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip, + bool enable) +{ + u16 val; + int err; + + err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SWITCH_MGMT, &val); + if (err) + return err; + + if (enable) + val |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU; + else + val &= ~MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU; + + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, val); +} + +int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) { int err; /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:2x as MGMT. + * addresses matching 01:80:c2:00:00:0x as MGMT. */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { - err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, 0xffff); - if (err) - return err; - } + err = mv88e6xxx_g2_mgmt_enable_0x(chip, 0xffff); + if (err) + return err; + + return mv88e6xxx_g2_switch_mgmt_rsvd2cpu(chip, true); +} + +int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + int err; /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:0x as MGMT. + * addresses matching 01:80:c2:00:00:2x as MGMT. */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) - return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, - 0xffff); + err = mv88e6xxx_g2_mgmt_enable_2x(chip, 0xffff); + if (err) + return err; - return 0; + return mv88e6185_g2_mgmt_rsvd2cpu(chip); } /* Offset 0x06: Device Mapping Table register */ @@ -260,7 +311,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val); } -static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) { int i, err; @@ -933,7 +984,7 @@ static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id) u16 reg; mutex_lock(&chip->reg_lock); - err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SOURCE, ®); + err = mv88e6xxx_g2_int_source(chip, ®); mutex_unlock(&chip->reg_lock); if (err) goto out; @@ -959,8 +1010,11 @@ static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d) static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d) { struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + int err; - mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, ~chip->g2_irq.masked); + err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked); + if (err) + dev_err(chip->dev, "failed to mask interrupts\n"); mutex_unlock(&chip->reg_lock); } @@ -1063,9 +1117,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) * port at the highest priority. */ reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || - mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) - reg |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU | 0x7; err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg); if (err) return err; @@ -1080,12 +1131,5 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (err) return err; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { - /* Clear the priority override table. */ - err = mv88e6xxx_g2_clear_pot(chip); - if (err) - return err; - } - return 0; } diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 317ffd8f323d..669f59017b12 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -17,14 +17,27 @@ #include "chip.h" -#define MV88E6XXX_G2 0x1c - /* Offset 0x00: Interrupt Source Register */ -#define MV88E6XXX_G2_INT_SOURCE 0x00 +#define MV88E6XXX_G2_INT_SRC 0x00 +#define MV88E6XXX_G2_INT_SRC_WDOG 0x8000 +#define MV88E6XXX_G2_INT_SRC_JAM_LIMIT 0x4000 +#define MV88E6XXX_G2_INT_SRC_DUPLEX_MISMATCH 0x2000 +#define MV88E6XXX_G2_INT_SRC_WAKE_EVENT 0x1000 +#define MV88E6352_G2_INT_SRC_SERDES 0x0800 +#define MV88E6352_G2_INT_SRC_PHY 0x001f +#define MV88E6390_G2_INT_SRC_PHY 0x07fe + #define MV88E6XXX_G2_INT_SOURCE_WATCHDOG 15 /* Offset 0x01: Interrupt Mask Register */ -#define MV88E6XXX_G2_INT_MASK 0x01 +#define MV88E6XXX_G2_INT_MASK 0x01 +#define MV88E6XXX_G2_INT_MASK_WDOG 0x8000 +#define MV88E6XXX_G2_INT_MASK_JAM_LIMIT 0x4000 +#define MV88E6XXX_G2_INT_MASK_DUPLEX_MISMATCH 0x2000 +#define MV88E6XXX_G2_INT_MASK_WAKE_EVENT 0x1000 +#define MV88E6352_G2_INT_MASK_SERDES 0x0800 +#define MV88E6352_G2_INT_MASK_PHY 0x001f +#define MV88E6390_G2_INT_MASK_PHY 0x07fe /* Offset 0x02: MGMT Enable Register 2x */ #define MV88E6XXX_G2_MGMT_EN_2X 0x02 @@ -245,7 +258,11 @@ int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip); -int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); + +int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); +int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); + +int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; @@ -254,7 +271,7 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + if (chip->info->global2_addr) { dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); return -EOPNOTSUPP; } @@ -347,7 +364,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) { } -static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) { return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 3500ac0ea848..317ae89cfa68 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -13,7 +13,6 @@ #include <linux/mdio.h> #include <linux/module.h> -#include <net/dsa.h> #include "chip.h" #include "phy.h" @@ -247,3 +246,99 @@ int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip) { return mv88e6xxx_phy_ppu_enable(chip); } + +/* Page 0, Register 16: Copper Specific Control Register 1 */ + +int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK; + + eee->eee_enabled = false; + eee->tx_lpi_enabled = false; + + switch (val) { + case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP: + eee->tx_lpi_enabled = true; + /* fall through... */ + case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV: + eee->eee_enabled = true; + } + + return 0; +} + +int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= ~MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK; + + if (eee->eee_enabled) + val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV; + if (eee->tx_lpi_enabled) + val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP; + + return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val); +} + +int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK; + + eee->eee_enabled = false; + eee->tx_lpi_enabled = false; + + switch (val) { + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO: + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW: + eee->tx_lpi_enabled = true; + /* fall through... */ + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO: + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW: + eee->eee_enabled = true; + } + + return 0; +} + +int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= ~MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK; + + if (eee->eee_enabled) + val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO; + if (eee->tx_lpi_enabled) + val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO; + + return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val); +} diff --git a/drivers/net/dsa/mv88e6xxx/phy.h b/drivers/net/dsa/mv88e6xxx/phy.h index 556b74a0502a..988802799ad6 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.h +++ b/drivers/net/dsa/mv88e6xxx/phy.h @@ -17,6 +17,19 @@ #define MV88E6XXX_PHY_PAGE 0x16 #define MV88E6XXX_PHY_PAGE_COPPER 0x00 +/* Page 0, Register 16: Copper Specific Control Register 1 */ +#define MV88E6XXX_PHY_CSCTL1 16 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK 0x0300 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK 0x0100 /* 0x */ +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV 0x0200 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP 0x0300 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK 0x0380 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK 0x0180 /* 0xx */ +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO 0x0200 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW 0x0280 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO 0x0300 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW 0x0380 + /* PHY Registers accesses implementations */ int mv88e6165_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int addr, int reg, u16 *val); @@ -40,4 +53,13 @@ void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip); void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip); int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip); +int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + #endif /*_MV88E6XXX_PHY_H */ diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index a7801f6668a5..2837a9128557 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -35,6 +35,23 @@ int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, return mv88e6xxx_write(chip, addr, reg, val); } +/* Offset 0x00: Port Status Register */ + +int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &val); + if (err) + return err; + + eee->eee_active = !!(val & MV88E6352_PORT_STS_EEE); + + return 0; +} + /* Offset 0x01: MAC (or PCS or Physical) Control Register * * Link, Duplex and Flow Control have one force bit, one value bit. diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 8f3991bf1851..6fcab309cd85 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -216,9 +216,6 @@ /* Offset 0x13: OutFiltered Counter */ #define MV88E6XXX_PORT_OUT_FILTERED 0x13 -/* Offset 0x16: LED Control */ -#define MV88E6XXX_PORT_LED_CONTROL 0x16 - /* Offset 0x18: IEEE Priority Mapping Table */ #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE 0x18 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE_UPDATE 0x8000 @@ -244,6 +241,9 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, u16 val); +int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee); + int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, phy_interface_t mode); int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d0c165d2086e..d0a1f9ce3168 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -345,7 +345,7 @@ static void dummy_setup(struct net_device *dev) dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; - dev->features |= NETIF_F_ALL_TSO | NETIF_F_UFO; + dev->features |= NETIF_F_ALL_TSO; dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 0938294f640a..e9282c924621 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -129,6 +129,7 @@ #include <net/dcbnl.h> #include <linux/completion.h> #include <linux/cpumask.h> +#include <linux/interrupt.h> #define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_VERSION "1.0.3" diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index 96dd5300e0e5..e58b157b7d7c 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -114,8 +114,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) int j, rev, rc = -EBUSY; if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) { - printk(KERN_ERR "can't use MACE %s: need 3 addrs and 3 irqs\n", - mace->full_name); + printk(KERN_ERR "can't use MACE %pOF: need 3 addrs and 3 irqs\n", + mace); return -ENODEV; } @@ -123,8 +123,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) if (addr == NULL) { addr = of_get_property(mace, "local-mac-address", NULL); if (addr == NULL) { - printk(KERN_ERR "Can't get mac-address for MACE %s\n", - mace->full_name); + printk(KERN_ERR "Can't get mac-address for MACE %pOF\n", + mace); return -ENODEV; } } diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 96413808c726..45775399cab6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -61,10 +61,12 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" + depends on (OF && HAS_IOMEM) || COMPILE_TEST select MII select PHYLIB select FIXED_PHY select BCM7XXX_PHY + select MDIO_BCM_UNIMAC help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. @@ -193,6 +195,7 @@ config SYSTEMPORT config BNXT tristate "Broadcom NetXtreme-C/E support" depends on PCI + depends on MAY_USE_DEVLINK select FW_LOADER select LIBCRC32C ---help--- diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index a7ca45b251cb..d141a22ac50b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o +bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e7c8539cbddf..156fb374522b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -33,6 +33,7 @@ #include <linux/mii.h> #include <linux/if.h> #include <linux/if_vlan.h> +#include <linux/if_bridge.h> #include <linux/rtc.h> #include <linux/bpf.h> #include <net/ip.h> @@ -56,6 +57,7 @@ #include "bnxt_ethtool.h" #include "bnxt_dcb.h" #include "bnxt_xdp.h" +#include "bnxt_vfr.h" #define BNXT_TX_TIMEOUT (5 * HZ) @@ -243,6 +245,16 @@ const u16 bnxt_lhint_arr[] = { TX_BD_FLAGS_LHINT_2048_AND_LARGER, }; +static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX) + return 0; + + return md_dst->u.port_info.port_id; +} + static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -287,7 +299,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_buf->nr_frags = last_frag; vlan_tag_flags = 0; - cfa_action = 0; + cfa_action = bnxt_xmit_get_cfa_action(skb); if (skb_vlan_tag_present(skb)) { vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN | skb_vlan_tag_get(skb); @@ -322,7 +334,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_push1->tx_bd_hsize_lflags = 0; tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); - tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action); + tx_push1->tx_bd_cfa_action = + cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); end = pdata + length; end = PTR_ALIGN(end, 8) - 1; @@ -427,7 +440,8 @@ normal_tx: txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); - txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action); + txbd1->tx_bd_cfa_action = + cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); for (i = 0; i < last_frag; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1032,7 +1046,10 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, bnxt_sched_reset(bp, rxr); return; } - + /* Store cfa_code in tpa_info to use in tpa_end + * completion processing. + */ + tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1); prod_rx_buf->data = tpa_info->data; prod_rx_buf->data_ptr = tpa_info->data_ptr; @@ -1267,6 +1284,17 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp, return skb; } +/* Given the cfa_code of a received packet determine which + * netdev (vf-rep or PF) the packet is destined to. + */ +static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code) +{ + struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code); + + /* if vf-rep dev is NULL, the must belongs to the PF */ + return dev ? dev : bp->dev; +} + static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, @@ -1360,7 +1388,9 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return NULL; } } - skb->protocol = eth_type_trans(skb, bp->dev); + + skb->protocol = + eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code)); if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); @@ -1387,6 +1417,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return skb; } +static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, + struct sk_buff *skb) +{ + if (skb->dev != bp->dev) { + /* this packet belongs to a vf-rep */ + bnxt_vf_rep_rx(bp, skb); + return; + } + skb_record_rx_queue(skb, bnapi->index); + napi_gro_receive(&bnapi->napi, skb); +} + /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet @@ -1403,7 +1445,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, struct rx_cmp *rxcmp; struct rx_cmp_ext *rxcmp1; u32 tmp_raw_cons = *raw_cons; - u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); + u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons); struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; @@ -1445,8 +1487,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, rc = -ENOMEM; if (likely(skb)) { - skb_record_rx_queue(skb, bnapi->index); - napi_gro_receive(&bnapi->napi, skb); + bnxt_deliver_skb(bp, bnapi, skb); rc = 1; } *event |= BNXT_RX_EVENT; @@ -1535,7 +1576,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type); } - skb->protocol = eth_type_trans(skb, dev); + cfa_code = RX_CMP_CFA_CODE(rxcmp1); + skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code)); if ((rxcmp1->rx_cmp_flags2 & cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && @@ -1560,8 +1602,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, } } - skb_record_rx_queue(skb, bnapi->index); - napi_gro_receive(&bnapi->napi, skb); + bnxt_deliver_skb(bp, bnapi, skb); rc = 1; next_rx: @@ -4577,6 +4618,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) { struct hwrm_func_qcfg_input req = {0}; struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + u16 flags; int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1); @@ -4593,15 +4635,15 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK; } #endif - if (BNXT_PF(bp)) { - u16 flags = le16_to_cpu(resp->flags); - - if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED | - FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) - bp->flags |= BNXT_FLAG_FW_LLDP_AGENT; - if (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST) - bp->flags |= BNXT_FLAG_MULTI_HOST; + flags = le16_to_cpu(resp->flags); + if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED | + FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) { + bp->flags |= BNXT_FLAG_FW_LLDP_AGENT; + if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED) + bp->flags |= BNXT_FLAG_FW_DCBX_AGENT; } + if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)) + bp->flags |= BNXT_FLAG_MULTI_HOST; switch (resp->port_partition_type) { case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0: @@ -4610,6 +4652,13 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) bp->port_partition_type = resp->port_partition_type; break; } + if (bp->hwrm_spec_code < 0x10707 || + resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEB) + bp->br_mode = BRIDGE_MODE_VEB; + else if (resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEPA) + bp->br_mode = BRIDGE_MODE_VEPA; + else + bp->br_mode = BRIDGE_MODE_UNDEF; func_qcfg_exit: mutex_unlock(&bp->hwrm_cmd_lock); @@ -4911,6 +4960,26 @@ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, } } +static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode) +{ + struct hwrm_func_cfg_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); + req.fid = cpu_to_le16(0xffff); + req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE); + if (br_mode == BRIDGE_MODE_VEB) + req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB; + else if (br_mode == BRIDGE_MODE_VEPA) + req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA; + else + return -EINVAL; + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + rc = -EIO; + return rc; +} + static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) { struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; @@ -5559,12 +5628,10 @@ void bnxt_tx_disable(struct bnxt *bp) { int i; struct bnxt_tx_ring_info *txr; - struct netdev_queue *txq; if (bp->tx_ring) { for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txq = netdev_get_tx_queue(bp->dev, i); txr->dev_state = BNXT_DEV_STATE_CLOSING; } } @@ -5577,11 +5644,9 @@ void bnxt_tx_enable(struct bnxt *bp) { int i; struct bnxt_tx_ring_info *txr; - struct netdev_queue *txq; for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txq = netdev_get_tx_queue(bp->dev, i); txr->dev_state = 0; } netif_tx_wake_all_queues(bp->dev); @@ -5646,7 +5711,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) if (rc) goto hwrm_phy_qcaps_exit; - if (resp->eee_supported & PORT_PHY_QCAPS_RESP_EEE_SUPPORTED) { + if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) { struct ethtool_eee *eee = &bp->eee; u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode); @@ -5686,13 +5751,15 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state) memcpy(&link_info->phy_qcfg_resp, resp, sizeof(*resp)); link_info->phy_link_status = resp->link; - link_info->duplex = resp->duplex; + link_info->duplex = resp->duplex_cfg; + if (bp->hwrm_spec_code >= 0x10800) + link_info->duplex = resp->duplex_state; link_info->pause = resp->pause; link_info->auto_mode = resp->auto_mode; link_info->auto_pause_setting = resp->auto_pause; link_info->lp_pause = resp->link_partner_adv_pause; link_info->force_pause_setting = resp->force_pause; - link_info->duplex_setting = resp->duplex; + link_info->duplex_setting = resp->duplex_cfg; if (link_info->phy_link_status == BNXT_LINK_LINK) link_info->link_speed = le16_to_cpu(resp->link_speed); else @@ -6214,6 +6281,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* Poll link status and check for SFP+ module status */ bnxt_get_port_module_status(bp); + /* VF-reps may need to be re-opened after the PF is re-opened */ + if (BNXT_PF(bp)) + bnxt_vf_reps_open(bp); return 0; open_err: @@ -6302,6 +6372,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n"); } + + /* Close the VF-reps before closing PF */ + if (BNXT_PF(bp)) + bnxt_vf_reps_close(bp); #endif /* Change device state to avoid TX queue wake up's */ bnxt_tx_disable(bp); @@ -6813,7 +6887,8 @@ static void bnxt_timer(unsigned long data) if (atomic_read(&bp->intr_sem) != 0) goto bnxt_restart_timer; - if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) { + if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) && + bp->stats_coal_ticks) { set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); schedule_work(&bp->sp_task); } @@ -7422,6 +7497,102 @@ static void bnxt_udp_tunnel_del(struct net_device *dev, schedule_work(&bp->sp_task); } +static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct bnxt *bp = netdev_priv(dev); + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bp->br_mode, 0, 0, + nlflags, filter_mask, NULL); +} + +static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags) +{ + struct bnxt *bp = netdev_priv(dev); + struct nlattr *attr, *br_spec; + int rem, rc = 0; + + if (bp->hwrm_spec_code < 0x10708 || !BNXT_SINGLE_PF(bp)) + return -EOPNOTSUPP; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + u16 mode; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + mode = nla_get_u16(attr); + if (mode == bp->br_mode) + break; + + rc = bnxt_hwrm_set_br_mode(bp, mode); + if (!rc) + bp->br_mode = mode; + break; + } + return rc; +} + +static int bnxt_get_phys_port_name(struct net_device *dev, char *buf, + size_t len) +{ + struct bnxt *bp = netdev_priv(dev); + int rc; + + /* The PF and it's VF-reps only support the switchdev framework */ + if (!BNXT_PF(bp)) + return -EOPNOTSUPP; + + rc = snprintf(buf, len, "p%d", bp->pf.port_id); + + if (rc >= len) + return -EOPNOTSUPP; + return 0; +} + +int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr) +{ + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return -EOPNOTSUPP; + + /* The PF and it's VF-reps only support the switchdev framework */ + if (!BNXT_PF(bp)) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + /* In SRIOV each PF-pool (PF + child VFs) serves as a + * switching domain, the PF's perm mac-addr can be used + * as the unique parent-id + */ + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, bp->pf.mac_addr); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int bnxt_swdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + return bnxt_port_attr_get(netdev_priv(dev), attr); +} + +static const struct switchdev_ops bnxt_switchdev_ops = { + .switchdev_port_attr_get = bnxt_swdev_port_attr_get +}; + static const struct net_device_ops bnxt_netdev_ops = { .ndo_open = bnxt_open, .ndo_start_xmit = bnxt_start_xmit, @@ -7453,6 +7624,9 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, .ndo_xdp = bnxt_xdp, + .ndo_bridge_getlink = bnxt_bridge_getlink, + .ndo_bridge_setlink = bnxt_bridge_setlink, + .ndo_get_phys_port_name = bnxt_get_phys_port_name }; static void bnxt_remove_one(struct pci_dev *pdev) @@ -7460,8 +7634,10 @@ static void bnxt_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(dev); - if (BNXT_PF(bp)) + if (BNXT_PF(bp)) { bnxt_sriov_disable(bp); + bnxt_dl_unregister(bp); + } pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); @@ -7710,6 +7886,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->netdev_ops = &bnxt_netdev_ops; dev->watchdog_timeo = BNXT_TX_TIMEOUT; dev->ethtool_ops = &bnxt_ethtool_ops; + SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops); pci_set_drvdata(pdev, dev); rc = bnxt_alloc_hwrm_resources(bp); @@ -7764,6 +7941,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); + mutex_init(&bp->sriov_lock); #endif bp->gro_func = bnxt_gro_func_5730x; if (BNXT_CHIP_P4_PLUS(bp)) @@ -7855,6 +8033,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_clr_int; + if (BNXT_PF(bp)) + bnxt_dl_register(bp); + netdev_info(dev, "%s found at mem %lx, node addr %pM\n", board_info[ent->driver_data].name, (long)pci_resource_start(pdev, 0), dev->dev_addr); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index f34691f85602..2d84d5719b70 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -12,13 +12,16 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.7.0" +#define DRV_MODULE_VERSION "1.8.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 7 +#define DRV_VER_MIN 8 #define DRV_VER_UPD 0 #include <linux/interrupt.h> +#include <net/devlink.h> +#include <net/dst_metadata.h> +#include <net/switchdev.h> struct tx_bd { __le32 tx_bd_len_flags_type; @@ -242,6 +245,10 @@ struct rx_cmp_ext { ((le32_to_cpu((rxcmp1)->rx_cmp_flags2) & \ RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3) +#define RX_CMP_CFA_CODE(rxcmpl1) \ + ((le32_to_cpu((rxcmpl1)->rx_cmp_cfa_code_errors_v2) & \ + RX_CMPL_CFA_CODE_MASK) >> RX_CMPL_CFA_CODE_SFT) + struct rx_agg_cmp { __le32 rx_agg_cmp_len_flags_type; #define RX_AGG_CMP_TYPE (0x3f << 0) @@ -311,6 +318,10 @@ struct rx_tpa_start_cmp_ext { __le32 rx_tpa_start_cmp_hdr_info; }; +#define TPA_START_CFA_CODE(rx_tpa_start) \ + ((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) & \ + RX_TPA_START_CMP_CFA_CODE) >> RX_TPA_START_CMPL_CFA_CODE_SHIFT) + struct rx_tpa_end_cmp { __le32 rx_tpa_end_cmp_len_flags_type; #define RX_TPA_END_CMP_TYPE (0x3f << 0) @@ -618,6 +629,8 @@ struct bnxt_tpa_info { #define BNXT_TPA_OUTER_L3_OFF(hdr_info) \ ((hdr_info) & 0x1ff) + + u16 cfa_code; /* cfa_code in TPA start compl */ }; struct bnxt_rx_ring_info { @@ -825,8 +838,8 @@ struct bnxt_link_info { u8 loop_back; u8 link_up; u8 duplex; -#define BNXT_LINK_DUPLEX_HALF PORT_PHY_QCFG_RESP_DUPLEX_HALF -#define BNXT_LINK_DUPLEX_FULL PORT_PHY_QCFG_RESP_DUPLEX_FULL +#define BNXT_LINK_DUPLEX_HALF PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF +#define BNXT_LINK_DUPLEX_FULL PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL u8 pause; #define BNXT_LINK_PAUSE_TX PORT_PHY_QCFG_RESP_PAUSE_TX #define BNXT_LINK_PAUSE_RX PORT_PHY_QCFG_RESP_PAUSE_RX @@ -928,6 +941,24 @@ struct bnxt_test_info { #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_BASE 0x300000 +struct bnxt_vf_rep_stats { + u64 packets; + u64 bytes; + u64 dropped; +}; + +struct bnxt_vf_rep { + struct bnxt *bp; + struct net_device *dev; + struct metadata_dst *dst; + u16 vf_idx; + u16 tx_cfa_action; + u16 rx_cfa_code; + + struct bnxt_vf_rep_stats rx_stats; + struct bnxt_vf_rep_stats tx_stats; +}; + struct bnxt { void __iomem *bar0; void __iomem *bar1; @@ -1027,6 +1058,7 @@ struct bnxt { #define BNXT_FLAG_MULTI_HOST 0x100000 #define BNXT_FLAG_SHORT_CMD 0x200000 #define BNXT_FLAG_DOUBLE_DB 0x400000 + #define BNXT_FLAG_FW_DCBX_AGENT 0x800000 #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000 #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ @@ -1164,6 +1196,7 @@ struct bnxt { u8 nge_port_cnt; __le16 nge_fw_dst_port_id; u8 port_partition_type; + u16 br_mode; u16 rx_coal_ticks; u16 rx_coal_ticks_irq; @@ -1206,6 +1239,12 @@ struct bnxt { wait_queue_head_t sriov_cfg_wait; bool sriov_cfg; #define BNXT_SRIOV_CFG_WAIT_TMO msecs_to_jiffies(10000) + + /* lock to protect VF-rep creation/cleanup via + * multiple paths such as ->sriov_configure() and + * devlink ->eswitch_mode_set() + */ + struct mutex sriov_lock; #endif #define BNXT_NTP_FLTR_MAX_FLTR 4096 @@ -1232,6 +1271,12 @@ struct bnxt { struct bnxt_led_info leds[BNXT_MAX_LED]; struct bpf_prog *xdp_prog; + + /* devlink interface and vf-rep structs */ + struct devlink *dl; + enum devlink_eswitch_mode eswitch_mode; + struct bnxt_vf_rep **vf_reps; /* array of vf-rep ptrs */ + u16 *cfa_code_map; /* cfa_code -> vf_idx map */ }; #define BNXT_RX_STATS_OFFSET(counter) \ @@ -1306,4 +1351,5 @@ int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); void bnxt_restore_pf_fw_resources(struct bnxt *bp); +int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr); #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 5c6dd0ce209f..aa1f3a2c7a78 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -93,6 +93,12 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets, cos2bw.tsa = QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS; cos2bw.bw_weight = ets->tc_tx_bw[i]; + /* older firmware requires min_bw to be set to the + * same weight value in percent. + */ + cos2bw.min_bw = + cpu_to_le32((ets->tc_tx_bw[i] * 100) | + BW_VALUE_UNIT_PERCENT1_100); } memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4); if (i == 0) { @@ -549,13 +555,18 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode) { struct bnxt *bp = netdev_priv(dev); - /* only support IEEE */ - if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE)) + /* All firmware DCBX settings are set in NVRAM */ + if (bp->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) return 1; if (mode & DCB_CAP_DCBX_HOST) { if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT)) return 1; + + /* only support IEEE */ + if ((mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_VER_IEEE)) + return 1; } if (mode == bp->dcbx_cap) @@ -584,7 +595,7 @@ void bnxt_dcb_init(struct bnxt *bp) bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE; if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT)) bp->dcbx_cap |= DCB_CAP_DCBX_HOST; - else + else if (bp->flags & BNXT_FLAG_FW_DCBX_AGENT) bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED; bp->dev->dcbnl_ops = &dcbnl_ops; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h index ecd0a5e46a49..d2e0af960bf5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h @@ -26,6 +26,7 @@ struct bnxt_cos2bw_cfg { u8 queue_id; __le32 min_bw; __le32 max_bw; +#define BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) u8 tsa; u8 pri_lvl; u8 bw_weight; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index be6acadcb202..08b870d7d466 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -86,9 +86,11 @@ static int bnxt_set_coalesce(struct net_device *dev, if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) { u32 stats_ticks = coal->stats_block_coalesce_usecs; - stats_ticks = clamp_t(u32, stats_ticks, - BNXT_MIN_STATS_COAL_TICKS, - BNXT_MAX_STATS_COAL_TICKS); + /* Allow 0, which means disable. */ + if (stats_ticks) + stats_ticks = clamp_t(u32, stats_ticks, + BNXT_MIN_STATS_COAL_TICKS, + BNXT_MAX_STATS_COAL_TICKS); stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS); bp->stats_coal_ticks = stats_ticks; update_stats = true; @@ -198,19 +200,23 @@ static const struct { #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr) +static int bnxt_get_num_stats(struct bnxt *bp) +{ + int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings; + + if (bp->flags & BNXT_FLAG_PORT_STATS) + num_stats += BNXT_NUM_PORT_STATS; + + return num_stats; +} + static int bnxt_get_sset_count(struct net_device *dev, int sset) { struct bnxt *bp = netdev_priv(dev); switch (sset) { - case ETH_SS_STATS: { - int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings; - - if (bp->flags & BNXT_FLAG_PORT_STATS) - num_stats += BNXT_NUM_PORT_STATS; - - return num_stats; - } + case ETH_SS_STATS: + return bnxt_get_num_stats(bp); case ETH_SS_TEST: if (!bp->num_tests) return -EOPNOTSUPP; @@ -225,11 +231,8 @@ static void bnxt_get_ethtool_stats(struct net_device *dev, { u32 i, j = 0; struct bnxt *bp = netdev_priv(dev); - u32 buf_size = sizeof(struct ctx_hw_stats) * bp->cp_nr_rings; u32 stat_fields = sizeof(struct ctx_hw_stats) / 8; - memset(buf, 0, buf_size); - if (!bp->bnapi) return; @@ -520,7 +523,7 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd) struct flow_keys *fkeys; int i, rc = -EINVAL; - if (fs->location < 0 || fs->location >= BNXT_NTP_FLTR_MAX_FLTR) + if (fs->location >= BNXT_NTP_FLTR_MAX_FLTR) return rc; for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { @@ -835,7 +838,7 @@ static void bnxt_get_drvinfo(struct net_device *dev, strlcpy(info->fw_version, bp->fw_ver_str, sizeof(info->fw_version)); strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); - info->n_stats = BNXT_NUM_STATS * bp->cp_nr_rings; + info->n_stats = bnxt_get_num_stats(bp); info->testinfo_len = bp->num_tests; /* TODO CHIMP_FW: eeprom dump details */ info->eedump_len = 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 7dc71bb95837..3ba22e8ee914 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -11,14 +11,14 @@ #ifndef BNXT_HSI_H #define BNXT_HSI_H -/* HSI and HWRM Specification 1.7.6 */ +/* HSI and HWRM Specification 1.8.0 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 7 -#define HWRM_VERSION_UPDATE 6 +#define HWRM_VERSION_MINOR 8 +#define HWRM_VERSION_UPDATE 0 -#define HWRM_VERSION_RSVD 2 /* non-zero means beta version */ +#define HWRM_VERSION_RSVD 0 /* non-zero means beta version */ -#define HWRM_VERSION_STR "1.7.6.2" +#define HWRM_VERSION_STR "1.8.0.0" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -813,7 +813,7 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL #define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED 0x8UL #define FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED 0x10UL - #define FUNC_QCFG_RESP_FLAGS_MULTI_HOST 0x20UL + #define FUNC_QCFG_RESP_FLAGS_MULTI_HOST 0x20UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -835,9 +835,8 @@ struct hwrm_func_qcfg_output { u8 port_pf_cnt; #define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL 0x0UL __le16 dflt_vnic_id; - u8 host_cnt; - #define FUNC_QCFG_RESP_HOST_CNT_UNAVAIL 0x0UL u8 unused_0; + u8 unused_1; __le32 min_bw; #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 @@ -874,12 +873,56 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL - u8 unused_1; + u8 unused_2; __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; __le16 alloc_sp_tx_rings; + u8 unused_3; + u8 valid; +}; + +/* hwrm_func_vlan_cfg */ +/* Input (48 bytes) */ +struct hwrm_func_vlan_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 fid; + u8 unused_0; + u8 unused_1; + __le32 enables; + #define FUNC_VLAN_CFG_REQ_ENABLES_STAG_VID 0x1UL + #define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_VID 0x2UL + #define FUNC_VLAN_CFG_REQ_ENABLES_STAG_PCP 0x4UL + #define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_PCP 0x8UL + #define FUNC_VLAN_CFG_REQ_ENABLES_STAG_TPID 0x10UL + #define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_TPID 0x20UL + __le16 stag_vid; + u8 stag_pcp; u8 unused_2; + __be16 stag_tpid; + __le16 ctag_vid; + u8 ctag_pcp; + u8 unused_3; + __be16 ctag_tpid; + __le32 rsvd1; + __le32 rsvd2; + __le32 unused_4; +}; + +/* Output (16 bytes) */ +struct hwrm_func_vlan_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; u8 valid; }; @@ -902,6 +945,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_ENABLE 0x200UL #define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_DISABLE 0x400UL #define FUNC_CFG_REQ_FLAGS_VIRT_MAC_PERSIST 0x800UL + #define FUNC_CFG_REQ_FLAGS_NO_AUTOCLEAR_STATISTIC 0x1000UL __le32 enables; #define FUNC_CFG_REQ_ENABLES_MTU 0x1UL #define FUNC_CFG_REQ_ENABLES_MRU 0x2UL @@ -1456,9 +1500,9 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL - u8 duplex; - #define PORT_PHY_QCFG_RESP_DUPLEX_HALF 0x0UL - #define PORT_PHY_QCFG_RESP_DUPLEX_FULL 0x1UL + u8 duplex_cfg; + #define PORT_PHY_QCFG_RESP_DUPLEX_CFG_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL 0x1UL u8 pause; #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL @@ -1573,6 +1617,9 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASELR4 0x16UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASEER4 0x17UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_ACTIVE_CABLE 0x18UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASET 0x19UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASESX 0x1aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX 0x1bUL u8 media_type; #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL @@ -1651,14 +1698,16 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED 0x10UL #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED 0x20UL #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED 0x40UL + u8 duplex_state; + #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL 0x1UL u8 unused_1; - u8 unused_2; char phy_vendor_name[16]; char phy_vendor_partnumber[16]; - __le32 unused_3; + __le32 unused_2; + u8 unused_3; u8 unused_4; u8 unused_5; - u8 unused_6; u8 valid; }; @@ -1744,6 +1793,51 @@ struct hwrm_port_mac_cfg_output { u8 valid; }; +/* hwrm_port_mac_ptp_qcfg */ +/* Input (24 bytes) */ +struct hwrm_port_mac_ptp_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 port_id; + __le16 unused_0[3]; +}; + +/* Output (80 bytes) */ +struct hwrm_port_mac_ptp_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS 0x1UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x2UL + u8 unused_0; + __le16 unused_1; + __le32 rx_ts_reg_off_lower; + __le32 rx_ts_reg_off_upper; + __le32 rx_ts_reg_off_seq_id; + __le32 rx_ts_reg_off_src_id_0; + __le32 rx_ts_reg_off_src_id_1; + __le32 rx_ts_reg_off_src_id_2; + __le32 rx_ts_reg_off_domain_id; + __le32 rx_ts_reg_off_fifo; + __le32 rx_ts_reg_off_fifo_adv; + __le32 rx_ts_reg_off_granularity; + __le32 tx_ts_reg_off_lower; + __le32 tx_ts_reg_off_upper; + __le32 tx_ts_reg_off_seq_id; + __le32 tx_ts_reg_off_fifo; + __le32 tx_ts_reg_off_granularity; + __le32 unused_2; + u8 unused_3; + u8 unused_4; + u8 unused_5; + u8 valid; +}; + /* hwrm_port_qstats */ /* Input (40 bytes) */ struct hwrm_port_qstats_input { @@ -1874,10 +1968,10 @@ struct hwrm_port_phy_qcaps_output { __le16 req_type; __le16 seq_id; __le16 resp_len; - u8 eee_supported; - #define PORT_PHY_QCAPS_RESP_EEE_SUPPORTED 0x1UL - #define PORT_PHY_QCAPS_RESP_RSVD1_MASK 0xfeUL - #define PORT_PHY_QCAPS_RESP_RSVD1_SFT 1 + u8 flags; + #define PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK 0xfeUL + #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT 1 u8 unused_0; __le16 supported_speeds_force_mode; #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL @@ -3152,6 +3246,95 @@ struct hwrm_queue_cos2bw_cfg_output { u8 valid; }; +/* hwrm_queue_dscp_qcaps */ +/* Input (24 bytes) */ +struct hwrm_queue_dscp_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 port_id; + u8 unused_0[7]; +}; + +/* Output (16 bytes) */ +struct hwrm_queue_dscp_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 num_dscp_bits; + u8 unused_0; + __le16 max_entries; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* hwrm_queue_dscp2pri_qcfg */ +/* Input (32 bytes) */ +struct hwrm_queue_dscp2pri_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + u8 port_id; + u8 unused_0; + __le16 dest_data_buffer_size; + __le32 unused_1; +}; + +/* Output (16 bytes) */ +struct hwrm_queue_dscp2pri_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 entry_cnt; + u8 default_pri; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* hwrm_queue_dscp2pri_cfg */ +/* Input (40 bytes) */ +struct hwrm_queue_dscp2pri_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le32 flags; + #define QUEUE_DSCP2PRI_CFG_REQ_FLAGS_USE_HW_DEFAULT_PRI 0x1UL + __le32 enables; + #define QUEUE_DSCP2PRI_CFG_REQ_ENABLES_DEFAULT_PRI 0x1UL + u8 port_id; + u8 default_pri; + __le16 entry_cnt; + __le32 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_queue_dscp2pri_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_vnic_alloc */ /* Input (24 bytes) */ struct hwrm_vnic_alloc_input { @@ -4038,7 +4221,7 @@ struct hwrm_cfa_encap_record_alloc_input { #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL u8 unused_0; __le16 unused_1; - __le32 encap_data[16]; + __le32 encap_data[20]; }; /* Output (16 bytes) */ @@ -4120,8 +4303,8 @@ struct hwrm_cfa_ntuple_filter_alloc_input { #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL u8 ip_protocol; #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x6UL - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x11UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x11UL __le16 dst_id; __le16 mirror_vnic_id; u8 tunnel_type; @@ -4224,6 +4407,58 @@ struct hwrm_cfa_ntuple_filter_cfg_output { u8 valid; }; +/* hwrm_cfa_vfr_alloc */ +/* Input (32 bytes) */ +struct hwrm_cfa_vfr_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 vf_id; + __le16 reserved; + __le32 unused_0; + char vfr_name[32]; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_vfr_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 rx_cfa_code; + __le16 tx_cfa_action; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; +}; + +/* hwrm_cfa_vfr_free */ +/* Input (24 bytes) */ +struct hwrm_cfa_vfr_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + char vfr_name[32]; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_vfr_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_tunnel_dst_port_query */ /* Input (24 bytes) */ struct hwrm_tunnel_dst_port_query_input { @@ -4448,12 +4683,13 @@ struct hwrm_fw_reset_input { #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL u8 selfrst_status; #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL - __le16 unused_0[3]; + u8 host_idx; + u8 unused_0[5]; }; /* Output (16 bytes) */ @@ -4487,7 +4723,7 @@ struct hwrm_fw_qstatus_input { #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL u8 unused_0[7]; }; @@ -4572,6 +4808,16 @@ struct hwrm_fw_set_structured_data_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_fw_set_structured_data_cmd_err { + u8 code; + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + u8 unused_0[7]; +}; + /* hwrm_fw_get_structured_data */ /* Input (32 bytes) */ struct hwrm_fw_get_structured_data_input { @@ -4611,6 +4857,14 @@ struct hwrm_fw_get_structured_data_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_fw_get_structured_data_cmd_err { + u8 code; + #define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + u8 unused_0[7]; +}; + /* hwrm_exec_fwd_resp */ /* Input (128 bytes) */ struct hwrm_exec_fwd_resp_input { @@ -5411,7 +5665,7 @@ struct cmd_nums { #define HWRM_PORT_LPBK_CLR_STATS (0x26UL) #define HWRM_PORT_PHY_QCFG (0x27UL) #define HWRM_PORT_MAC_QCFG (0x28UL) - #define RESERVED7 (0x29UL) + #define HWRM_PORT_MAC_PTP_QCFG (0x29UL) #define HWRM_PORT_PHY_QCAPS (0x2aUL) #define HWRM_PORT_PHY_I2C_WRITE (0x2bUL) #define HWRM_PORT_PHY_I2C_READ (0x2cUL) @@ -5421,14 +5675,17 @@ struct cmd_nums { #define HWRM_QUEUE_QPORTCFG (0x30UL) #define HWRM_QUEUE_QCFG (0x31UL) #define HWRM_QUEUE_CFG (0x32UL) - #define RESERVED2 (0x33UL) - #define RESERVED3 (0x34UL) + #define HWRM_FUNC_VLAN_CFG (0x33UL) + #define HWRM_FUNC_VLAN_QCFG (0x34UL) #define HWRM_QUEUE_PFCENABLE_QCFG (0x35UL) #define HWRM_QUEUE_PFCENABLE_CFG (0x36UL) #define HWRM_QUEUE_PRI2COS_QCFG (0x37UL) #define HWRM_QUEUE_PRI2COS_CFG (0x38UL) #define HWRM_QUEUE_COS2BW_QCFG (0x39UL) #define HWRM_QUEUE_COS2BW_CFG (0x3aUL) + #define HWRM_QUEUE_DSCP_QCAPS (0x3bUL) + #define HWRM_QUEUE_DSCP2PRI_QCFG (0x3cUL) + #define HWRM_QUEUE_DSCP2PRI_CFG (0x3dUL) #define HWRM_VNIC_ALLOC (0x40UL) #define HWRM_VNIC_FREE (0x41UL) #define HWRM_VNIC_CFG (0x42UL) @@ -5455,7 +5712,7 @@ struct cmd_nums { #define HWRM_CFA_L2_FILTER_FREE (0x91UL) #define HWRM_CFA_L2_FILTER_CFG (0x92UL) #define HWRM_CFA_L2_SET_RX_MASK (0x93UL) - #define RESERVED4 (0x94UL) + #define HWRM_CFA_VLAN_ANTISPOOF_CFG (0x94UL) #define HWRM_CFA_TUNNEL_FILTER_ALLOC (0x95UL) #define HWRM_CFA_TUNNEL_FILTER_FREE (0x96UL) #define HWRM_CFA_ENCAP_RECORD_ALLOC (0x97UL) @@ -5494,6 +5751,8 @@ struct cmd_nums { #define HWRM_CFA_METER_PROFILE_CFG (0xf7UL) #define HWRM_CFA_METER_INSTANCE_ALLOC (0xf8UL) #define HWRM_CFA_METER_INSTANCE_FREE (0xf9UL) + #define HWRM_CFA_VFR_ALLOC (0xfdUL) + #define HWRM_CFA_VFR_FREE (0xfeUL) #define HWRM_CFA_VF_PAIR_ALLOC (0x100UL) #define HWRM_CFA_VF_PAIR_FREE (0x101UL) #define HWRM_CFA_VF_PAIR_INFO (0x102UL) @@ -5502,6 +5761,9 @@ struct cmd_nums { #define HWRM_CFA_FLOW_FLUSH (0x105UL) #define HWRM_CFA_FLOW_STATS (0x106UL) #define HWRM_CFA_FLOW_INFO (0x107UL) + #define HWRM_CFA_DECAP_FILTER_ALLOC (0x108UL) + #define HWRM_CFA_DECAP_FILTER_FREE (0x109UL) + #define HWRM_CFA_VLAN_ANTISPOOF_QCFG (0x10aUL) #define HWRM_SELFTEST_QLIST (0x200UL) #define HWRM_SELFTEST_EXEC (0x201UL) #define HWRM_SELFTEST_IRQ (0x202UL) @@ -5510,6 +5772,8 @@ struct cmd_nums { #define HWRM_DBG_WRITE_DIRECT (0xff12UL) #define HWRM_DBG_WRITE_INDIRECT (0xff13UL) #define HWRM_DBG_DUMP (0xff14UL) + #define HWRM_DBG_ERASE_NVM (0xff15UL) + #define HWRM_DBG_CFG (0xff16UL) #define HWRM_NVM_FACTORY_DEFAULTS (0xffeeUL) #define HWRM_NVM_VALIDATE_OPTION (0xffefUL) #define HWRM_NVM_FLUSH (0xfff0UL) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index b8e7248294d9..d37925a8a65b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -18,6 +18,7 @@ #include "bnxt.h" #include "bnxt_ulp.h" #include "bnxt_sriov.h" +#include "bnxt_vfr.h" #include "bnxt_ethtool.h" #ifdef CONFIG_BNXT_SRIOV @@ -587,6 +588,10 @@ void bnxt_sriov_disable(struct bnxt *bp) if (!num_vfs) return; + /* synchronize VF and VF-rep create and destroy */ + mutex_lock(&bp->sriov_lock); + bnxt_vf_reps_destroy(bp); + if (pci_vfs_assigned(bp->pdev)) { bnxt_hwrm_fwd_async_event_cmpl( bp, NULL, ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD); @@ -597,6 +602,7 @@ void bnxt_sriov_disable(struct bnxt *bp) /* Free the HW resources reserved for various VF's */ bnxt_hwrm_func_vf_resource_free(bp, num_vfs); } + mutex_unlock(&bp->sriov_lock); bnxt_free_vf_resources(bp); @@ -794,8 +800,10 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) PORT_PHY_QCFG_RESP_LINK_LINK; phy_qcfg_resp.link_speed = cpu_to_le16( PORT_PHY_QCFG_RESP_LINK_SPEED_10GB); - phy_qcfg_resp.duplex = - PORT_PHY_QCFG_RESP_DUPLEX_FULL; + phy_qcfg_resp.duplex_cfg = + PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL; + phy_qcfg_resp.duplex_state = + PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL; phy_qcfg_resp.pause = (PORT_PHY_QCFG_RESP_PAUSE_TX | PORT_PHY_QCFG_RESP_PAUSE_RX); @@ -804,7 +812,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) /* force link down */ phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_NO_LINK; phy_qcfg_resp.link_speed = 0; - phy_qcfg_resp.duplex = PORT_PHY_QCFG_RESP_DUPLEX_HALF; + phy_qcfg_resp.duplex_state = + PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF; phy_qcfg_resp.pause = 0; } rc = bnxt_hwrm_fwd_resp(bp, vf, &phy_qcfg_resp, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c new file mode 100644 index 000000000000..b05c5d0ee3f9 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -0,0 +1,495 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2016-2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/jhash.h> + +#include "bnxt_hsi.h" +#include "bnxt.h" +#include "bnxt_vfr.h" + +#ifdef CONFIG_BNXT_SRIOV + +#define CFA_HANDLE_INVALID 0xffff +#define VF_IDX_INVALID 0xffff + +static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx, + u16 *tx_cfa_action, u16 *rx_cfa_code) +{ + struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_vfr_alloc_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1); + req.vf_id = cpu_to_le16(vf_idx); + sprintf(req.vfr_name, "vfr%d", vf_idx); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action); + *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code); + netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x", + *tx_cfa_action, *rx_cfa_code); + } else { + netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + } + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + +static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx) +{ + struct hwrm_cfa_vfr_free_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1); + sprintf(req.vfr_name, "vfr%d", vf_idx); + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + return rc; +} + +static int bnxt_vf_rep_open(struct net_device *dev) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + struct bnxt *bp = vf_rep->bp; + + /* Enable link and TX only if the parent PF is open. */ + if (netif_running(bp->dev)) { + netif_carrier_on(dev); + netif_tx_start_all_queues(dev); + } + return 0; +} + +static int bnxt_vf_rep_close(struct net_device *dev) +{ + netif_carrier_off(dev); + netif_tx_disable(dev); + + return 0; +} + +static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + int rc, len = skb->len; + + skb_dst_drop(skb); + dst_hold((struct dst_entry *)vf_rep->dst); + skb_dst_set(skb, (struct dst_entry *)vf_rep->dst); + skb->dev = vf_rep->dst->u.port_info.lower_dev; + + rc = dev_queue_xmit(skb); + if (!rc) { + vf_rep->tx_stats.packets++; + vf_rep->tx_stats.bytes += len; + } + return rc; +} + +static void +bnxt_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + + stats->rx_packets = vf_rep->rx_stats.packets; + stats->rx_bytes = vf_rep->rx_stats.bytes; + stats->tx_packets = vf_rep->tx_stats.packets; + stats->tx_bytes = vf_rep->tx_stats.bytes; +} + +struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code) +{ + u16 vf_idx; + + if (cfa_code && bp->cfa_code_map && BNXT_PF(bp)) { + vf_idx = bp->cfa_code_map[cfa_code]; + if (vf_idx != VF_IDX_INVALID) + return bp->vf_reps[vf_idx]->dev; + } + return NULL; +} + +void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev); + struct bnxt_vf_rep_stats *rx_stats; + + rx_stats = &vf_rep->rx_stats; + vf_rep->rx_stats.bytes += skb->len; + vf_rep->rx_stats.packets++; + + netif_receive_skb(skb); +} + +static int bnxt_vf_rep_get_phys_port_name(struct net_device *dev, char *buf, + size_t len) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + struct pci_dev *pf_pdev = vf_rep->bp->pdev; + int rc; + + rc = snprintf(buf, len, "pf%dvf%d", PCI_FUNC(pf_pdev->devfn), + vf_rep->vf_idx); + if (rc >= len) + return -EOPNOTSUPP; + return 0; +} + +static void bnxt_vf_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); +} + +static int bnxt_vf_rep_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + + /* as only PORT_PARENT_ID is supported currently use common code + * between PF and VF-rep for now. + */ + return bnxt_port_attr_get(vf_rep->bp, attr); +} + +static const struct switchdev_ops bnxt_vf_rep_switchdev_ops = { + .switchdev_port_attr_get = bnxt_vf_rep_port_attr_get +}; + +static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = { + .get_drvinfo = bnxt_vf_rep_get_drvinfo +}; + +static const struct net_device_ops bnxt_vf_rep_netdev_ops = { + .ndo_open = bnxt_vf_rep_open, + .ndo_stop = bnxt_vf_rep_close, + .ndo_start_xmit = bnxt_vf_rep_xmit, + .ndo_get_stats64 = bnxt_vf_rep_get_stats64, + .ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name +}; + +/* Called when the parent PF interface is closed: + * As the mode transition from SWITCHDEV to LEGACY + * happens under the rtnl_lock() this routine is safe + * under the rtnl_lock() + */ +void bnxt_vf_reps_close(struct bnxt *bp) +{ + struct bnxt_vf_rep *vf_rep; + u16 num_vfs, i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + num_vfs = pci_num_vf(bp->pdev); + for (i = 0; i < num_vfs; i++) { + vf_rep = bp->vf_reps[i]; + if (netif_running(vf_rep->dev)) + bnxt_vf_rep_close(vf_rep->dev); + } +} + +/* Called when the parent PF interface is opened (re-opened): + * As the mode transition from SWITCHDEV to LEGACY + * happen under the rtnl_lock() this routine is safe + * under the rtnl_lock() + */ +void bnxt_vf_reps_open(struct bnxt *bp) +{ + int i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + for (i = 0; i < pci_num_vf(bp->pdev); i++) + bnxt_vf_rep_open(bp->vf_reps[i]->dev); +} + +static void __bnxt_vf_reps_destroy(struct bnxt *bp) +{ + u16 num_vfs = pci_num_vf(bp->pdev); + struct bnxt_vf_rep *vf_rep; + int i; + + for (i = 0; i < num_vfs; i++) { + vf_rep = bp->vf_reps[i]; + if (vf_rep) { + dst_release((struct dst_entry *)vf_rep->dst); + + if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) + hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); + + if (vf_rep->dev) { + /* if register_netdev failed, then netdev_ops + * would have been set to NULL + */ + if (vf_rep->dev->netdev_ops) + unregister_netdev(vf_rep->dev); + free_netdev(vf_rep->dev); + } + } + } + + kfree(bp->vf_reps); + bp->vf_reps = NULL; +} + +void bnxt_vf_reps_destroy(struct bnxt *bp) +{ + bool closed = false; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + if (!bp->vf_reps) + return; + + /* Ensure that parent PF's and VF-reps' RX/TX has been quiesced + * before proceeding with VF-rep cleanup. + */ + rtnl_lock(); + if (netif_running(bp->dev)) { + bnxt_close_nic(bp, false, false); + closed = true; + } + /* un-publish cfa_code_map so that RX path can't see it anymore */ + kfree(bp->cfa_code_map); + bp->cfa_code_map = NULL; + bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + + if (closed) + bnxt_open_nic(bp, false, false); + rtnl_unlock(); + + /* Need to call vf_reps_destroy() outside of rntl_lock + * as unregister_netdev takes rtnl_lock + */ + __bnxt_vf_reps_destroy(bp); +} + +/* Use the OUI of the PF's perm addr and report the same mac addr + * for the same VF-rep each time + */ +static void bnxt_vf_rep_eth_addr_gen(u8 *src_mac, u16 vf_idx, u8 *mac) +{ + u32 addr; + + ether_addr_copy(mac, src_mac); + + addr = jhash(src_mac, ETH_ALEN, 0) + vf_idx; + mac[3] = (u8)(addr & 0xFF); + mac[4] = (u8)((addr >> 8) & 0xFF); + mac[5] = (u8)((addr >> 16) & 0xFF); +} + +static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, + struct net_device *dev) +{ + struct net_device *pf_dev = bp->dev; + + dev->netdev_ops = &bnxt_vf_rep_netdev_ops; + dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; + SWITCHDEV_SET_OPS(dev, &bnxt_vf_rep_switchdev_ops); + /* Just inherit all the featues of the parent PF as the VF-R + * uses the RX/TX rings of the parent PF + */ + dev->hw_features = pf_dev->hw_features; + dev->gso_partial_features = pf_dev->gso_partial_features; + dev->vlan_features = pf_dev->vlan_features; + dev->hw_enc_features = pf_dev->hw_enc_features; + dev->features |= pf_dev->features; + bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx, + dev->perm_addr); + ether_addr_copy(dev->dev_addr, dev->perm_addr); +} + +static int bnxt_vf_reps_create(struct bnxt *bp) +{ + u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev); + struct bnxt_vf_rep *vf_rep; + struct net_device *dev; + int rc, i; + + bp->vf_reps = kcalloc(num_vfs, sizeof(vf_rep), GFP_KERNEL); + if (!bp->vf_reps) + return -ENOMEM; + + /* storage for cfa_code to vf-idx mapping */ + cfa_code_map = kmalloc(sizeof(*bp->cfa_code_map) * MAX_CFA_CODE, + GFP_KERNEL); + if (!cfa_code_map) { + rc = -ENOMEM; + goto err; + } + for (i = 0; i < MAX_CFA_CODE; i++) + cfa_code_map[i] = VF_IDX_INVALID; + + for (i = 0; i < num_vfs; i++) { + dev = alloc_etherdev(sizeof(*vf_rep)); + if (!dev) { + rc = -ENOMEM; + goto err; + } + + vf_rep = netdev_priv(dev); + bp->vf_reps[i] = vf_rep; + vf_rep->dev = dev; + vf_rep->bp = bp; + vf_rep->vf_idx = i; + vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; + + /* get cfa handles from FW */ + rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, + &vf_rep->tx_cfa_action, + &vf_rep->rx_cfa_code); + if (rc) { + rc = -ENOLINK; + goto err; + } + cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; + + vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, + GFP_KERNEL); + if (!vf_rep->dst) { + rc = -ENOMEM; + goto err; + } + /* only cfa_action is needed to mux a packet while TXing */ + vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; + vf_rep->dst->u.port_info.lower_dev = bp->dev; + + bnxt_vf_rep_netdev_init(bp, vf_rep, dev); + rc = register_netdev(dev); + if (rc) { + /* no need for unregister_netdev in cleanup */ + dev->netdev_ops = NULL; + goto err; + } + } + + /* publish cfa_code_map only after all VF-reps have been initialized */ + bp->cfa_code_map = cfa_code_map; + bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + netif_keep_dst(bp->dev); + return 0; + +err: + netdev_info(bp->dev, "%s error=%d", __func__, rc); + kfree(cfa_code_map); + __bnxt_vf_reps_destroy(bp); + return rc; +} + +/* Devlink related routines */ +static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(devlink); + + *mode = bp->eswitch_mode; + return 0; +} + +static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(devlink); + int rc = 0; + + mutex_lock(&bp->sriov_lock); + if (bp->eswitch_mode == mode) { + netdev_info(bp->dev, "already in %s eswitch mode", + mode == DEVLINK_ESWITCH_MODE_LEGACY ? + "legacy" : "switchdev"); + rc = -EINVAL; + goto done; + } + + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + bnxt_vf_reps_destroy(bp); + break; + + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + if (pci_num_vf(bp->pdev) == 0) { + netdev_info(bp->dev, + "Enable VFs before setting swtichdev mode"); + rc = -EPERM; + goto done; + } + rc = bnxt_vf_reps_create(bp); + break; + + default: + rc = -EINVAL; + goto done; + } +done: + mutex_unlock(&bp->sriov_lock); + return rc; +} + +static const struct devlink_ops bnxt_dl_ops = { + .eswitch_mode_set = bnxt_dl_eswitch_mode_set, + .eswitch_mode_get = bnxt_dl_eswitch_mode_get +}; + +int bnxt_dl_register(struct bnxt *bp) +{ + struct devlink *dl; + int rc; + + if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) + return 0; + + if (bp->hwrm_spec_code < 0x10800) { + netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); + return -ENOTSUPP; + } + + dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + if (!dl) { + netdev_warn(bp->dev, "devlink_alloc failed"); + return -ENOMEM; + } + + bnxt_link_bp_to_dl(dl, bp); + bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + rc = devlink_register(dl, &bp->pdev->dev); + if (rc) { + bnxt_link_bp_to_dl(dl, NULL); + devlink_free(dl); + netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); + return rc; + } + + return 0; +} + +void bnxt_dl_unregister(struct bnxt *bp) +{ + struct devlink *dl = bp->dl; + + if (!dl) + return; + + devlink_unregister(dl); + devlink_free(dl); +} + +#endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h new file mode 100644 index 000000000000..e55a3b693e20 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -0,0 +1,72 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2016-2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_VFR_H +#define BNXT_VFR_H + +#ifdef CONFIG_BNXT_SRIOV + +#define MAX_CFA_CODE 65536 + +/* Struct to hold housekeeping info needed by devlink interface */ +struct bnxt_dl { + struct bnxt *bp; /* back ptr to the controlling dev */ +}; + +static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) +{ + return ((struct bnxt_dl *)devlink_priv(dl))->bp; +} + +static inline void bnxt_link_bp_to_dl(struct devlink *dl, struct bnxt *bp) +{ + struct bnxt_dl *bp_dl = devlink_priv(dl); + + bp_dl->bp = bp; + if (bp) + bp->dl = dl; +} + +int bnxt_dl_register(struct bnxt *bp); +void bnxt_dl_unregister(struct bnxt *bp); +void bnxt_vf_reps_destroy(struct bnxt *bp); +void bnxt_vf_reps_close(struct bnxt *bp); +void bnxt_vf_reps_open(struct bnxt *bp); +void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb); +struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code); + +#else + +static inline int bnxt_dl_register(struct bnxt *bp) +{ + return 0; +} + +static inline void bnxt_dl_unregister(struct bnxt *bp) +{ +} + +static inline void bnxt_vf_reps_close(struct bnxt *bp) +{ +} + +static inline void bnxt_vf_reps_open(struct bnxt *bp) +{ +} + +static inline void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb) +{ +} + +static inline struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code) +{ + return NULL; +} +#endif /* CONFIG_BNXT_SRIOV */ +#endif /* BNXT_VFR_H */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 3a34fdba5301..b1fdd3cc10d1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -657,6 +657,7 @@ struct bcmgenet_priv { struct clk *clk; struct platform_device *pdev; + struct platform_device *mii_pdev; /* WOL */ struct clk *clk_wol; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 30cb97b4a1d7..18f5723be2c9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -24,62 +24,10 @@ #include <linux/of_net.h> #include <linux/of_mdio.h> #include <linux/platform_data/bcmgenet.h> +#include <linux/platform_data/mdio-bcm-unimac.h> #include "bcmgenet.h" -/* read a value from the MII */ -static int bcmgenet_mii_read(struct mii_bus *bus, int phy_id, int location) -{ - int ret; - struct net_device *dev = bus->priv; - struct bcmgenet_priv *priv = netdev_priv(dev); - u32 reg; - - bcmgenet_umac_writel(priv, (MDIO_RD | (phy_id << MDIO_PMD_SHIFT) | - (location << MDIO_REG_SHIFT)), UMAC_MDIO_CMD); - /* Start MDIO transaction*/ - reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD); - reg |= MDIO_START_BUSY; - bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD); - wait_event_timeout(priv->wq, - !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD) - & MDIO_START_BUSY), - HZ / 100); - ret = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD); - - /* Some broken devices are known not to release the line during - * turn-around, e.g: Broadcom BCM53125 external switches, so check for - * that condition here and ignore the MDIO controller read failure - * indication. - */ - if (!(bus->phy_ignore_ta_mask & 1 << phy_id) && (ret & MDIO_READ_FAIL)) - return -EIO; - - return ret & 0xffff; -} - -/* write a value to the MII */ -static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, - int location, u16 val) -{ - struct net_device *dev = bus->priv; - struct bcmgenet_priv *priv = netdev_priv(dev); - u32 reg; - - bcmgenet_umac_writel(priv, (MDIO_WR | (phy_id << MDIO_PMD_SHIFT) | - (location << MDIO_REG_SHIFT) | (0xffff & val)), - UMAC_MDIO_CMD); - reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD); - reg |= MDIO_START_BUSY; - bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD); - wait_event_timeout(priv->wq, - !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD) & - MDIO_START_BUSY), - HZ / 100); - - return 0; -} - /* setup netdev link state when PHY link status change and * update UMAC and RGMII block when link up */ @@ -393,104 +341,121 @@ int bcmgenet_mii_probe(struct net_device *dev) return 0; } -/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with - * their internal MDIO management controller making them fail to successfully - * be read from or written to for the first transaction. We insert a dummy - * BMSR read here to make sure that phy_get_device() and get_phy_id() can - * correctly read the PHY MII_PHYSID1/2 registers and successfully register a - * PHY device for this peripheral. - * - * Once the PHY driver is registered, we can workaround subsequent reads from - * there (e.g: during system-wide power management). - * - * bus->reset is invoked before mdiobus_scan during mdiobus_register and is - * therefore the right location to stick that workaround. Since we do not want - * to read from non-existing PHYs, we either use bus->phy_mask or do a manual - * Device Tree scan to limit the search area. - */ -static int bcmgenet_mii_bus_reset(struct mii_bus *bus) +static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv) { - struct net_device *dev = bus->priv; - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *np = priv->mdio_dn; - struct device_node *child = NULL; - u32 read_mask = 0; - int addr = 0; + struct device_node *dn = priv->pdev->dev.of_node; + struct device *kdev = &priv->pdev->dev; + char *compat; - if (!np) { - read_mask = 1 << priv->phy_addr; - } else { - for_each_available_child_of_node(np, child) { - addr = of_mdio_parse_addr(&dev->dev, child); - if (addr < 0) - continue; + compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version); + if (!compat) + return NULL; - read_mask |= 1 << addr; - } + priv->mdio_dn = of_find_compatible_node(dn, NULL, compat); + kfree(compat); + if (!priv->mdio_dn) { + dev_err(kdev, "unable to find MDIO bus node\n"); + return NULL; } - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - if (read_mask & 1 << addr) { - dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr); - mdiobus_read(bus, addr, MII_BMSR); - } + return priv->mdio_dn; +} + +static void bcmgenet_mii_pdata_init(struct bcmgenet_priv *priv, + struct unimac_mdio_pdata *ppd) +{ + struct device *kdev = &priv->pdev->dev; + struct bcmgenet_platform_data *pd = kdev->platform_data; + + if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { + /* + * Internal or external PHY with MDIO access + */ + if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) + ppd->phy_mask = 1 << pd->phy_address; + else + ppd->phy_mask = 0; } +} +static int bcmgenet_mii_wait(void *wait_func_data) +{ + struct bcmgenet_priv *priv = wait_func_data; + + wait_event_timeout(priv->wq, + !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD) + & MDIO_START_BUSY), + HZ / 100); return 0; } -static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv) +static int bcmgenet_mii_register(struct bcmgenet_priv *priv) { - struct mii_bus *bus; + struct platform_device *pdev = priv->pdev; + struct bcmgenet_platform_data *pdata = pdev->dev.platform_data; + struct device_node *dn = pdev->dev.of_node; + struct unimac_mdio_pdata ppd; + struct platform_device *ppdev; + struct resource *pres, res; + int id, ret; + + pres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + memset(&res, 0, sizeof(res)); + memset(&ppd, 0, sizeof(ppd)); + + ppd.wait_func = bcmgenet_mii_wait; + ppd.wait_func_data = priv; + ppd.bus_name = "bcmgenet MII bus"; + + /* Unimac MDIO bus controller starts at UniMAC offset + MDIO_CMD + * and is 2 * 32-bits word long, 8 bytes total. + */ + res.start = pres->start + GENET_UMAC_OFF + UMAC_MDIO_CMD; + res.end = res.start + 8; + res.flags = IORESOURCE_MEM; - if (priv->mii_bus) - return 0; + if (dn) + id = of_alias_get_id(dn, "eth"); + else + id = pdev->id; - priv->mii_bus = mdiobus_alloc(); - if (!priv->mii_bus) { - pr_err("failed to allocate\n"); + ppdev = platform_device_alloc(UNIMAC_MDIO_DRV_NAME, id); + if (!ppdev) return -ENOMEM; - } - bus = priv->mii_bus; - bus->priv = priv->dev; - bus->name = "bcmgenet MII bus"; - bus->parent = &priv->pdev->dev; - bus->read = bcmgenet_mii_read; - bus->write = bcmgenet_mii_write; - bus->reset = bcmgenet_mii_bus_reset; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", - priv->pdev->name, priv->pdev->id); + /* Retain this platform_device pointer for later cleanup */ + priv->mii_pdev = ppdev; + ppdev->dev.parent = &pdev->dev; + ppdev->dev.of_node = bcmgenet_mii_of_find_mdio(priv); + if (pdata) + bcmgenet_mii_pdata_init(priv, &ppd); + + ret = platform_device_add_resources(ppdev, &res, 1); + if (ret) + goto out; + + ret = platform_device_add_data(ppdev, &ppd, sizeof(ppd)); + if (ret) + goto out; + + ret = platform_device_add(ppdev); + if (ret) + goto out; return 0; +out: + platform_device_put(ppdev); + return ret; } static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) { struct device_node *dn = priv->pdev->dev.of_node; struct device *kdev = &priv->pdev->dev; - struct phy_device *phydev = NULL; - char *compat; + struct phy_device *phydev; int phy_mode; int ret; - compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version); - if (!compat) - return -ENOMEM; - - priv->mdio_dn = of_find_compatible_node(dn, NULL, compat); - kfree(compat); - if (!priv->mdio_dn) { - dev_err(kdev, "unable to find MDIO bus node\n"); - return -ENODEV; - } - - ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn); - if (ret) { - dev_err(kdev, "failed to register MDIO bus\n"); - return ret; - } - /* Fetch the PHY phandle */ priv->phy_dn = of_parse_phandle(dn, "phy-handle", 0); @@ -537,33 +502,23 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; struct bcmgenet_platform_data *pd = kdev->platform_data; - struct mii_bus *mdio = priv->mii_bus; + char phy_name[MII_BUS_ID_SIZE + 3]; + char mdio_bus_id[MII_BUS_ID_SIZE]; struct phy_device *phydev; - int ret; + + snprintf(mdio_bus_id, MII_BUS_ID_SIZE, "%s-%d", + UNIMAC_MDIO_DRV_NAME, priv->pdev->id); if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { + snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, + mdio_bus_id, pd->phy_address); + /* * Internal or external PHY with MDIO access */ - if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) - mdio->phy_mask = ~(1 << pd->phy_address); - else - mdio->phy_mask = 0; - - ret = mdiobus_register(mdio); - if (ret) { - dev_err(kdev, "failed to register MDIO bus\n"); - return ret; - } - - if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) - phydev = mdiobus_get_phy(mdio, pd->phy_address); - else - phydev = phy_find_first(mdio); - + phydev = phy_attach(priv->dev, phy_name, pd->phy_interface); if (!phydev) { dev_err(kdev, "failed to register PHY device\n"); - mdiobus_unregister(mdio); return -ENODEV; } } else { @@ -609,10 +564,9 @@ static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv) int bcmgenet_mii_init(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *dn = priv->pdev->dev.of_node; int ret; - ret = bcmgenet_mii_alloc(priv); + ret = bcmgenet_mii_register(priv); if (ret) return ret; @@ -623,11 +577,7 @@ int bcmgenet_mii_init(struct net_device *dev) return 0; out: - if (of_phy_is_fixed_link(dn)) - of_phy_deregister_fixed_link(dn); - of_node_put(priv->phy_dn); - mdiobus_unregister(priv->mii_bus); - mdiobus_free(priv->mii_bus); + bcmgenet_mii_exit(dev); return ret; } @@ -639,6 +589,6 @@ void bcmgenet_mii_exit(struct net_device *dev) if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); - mdiobus_unregister(priv->mii_bus); - mdiobus_free(priv->mii_bus); + platform_device_unregister(priv->mii_pdev); + platform_device_put(priv->mii_pdev); } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 26d25749c3e4..6df2cad61647 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -68,7 +68,7 @@ #define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU -#define MACB_NETIF_LSO (NETIF_F_TSO | NETIF_F_UFO) +#define MACB_NETIF_LSO NETIF_F_TSO #define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0) #define MACB_WOL_ENABLED (0x1 << 1) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 9906fda76087..248a8fc45069 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -128,7 +128,7 @@ static void macb_remove(struct pci_dev *pdev) clk_unregister(plat_data->hclk); } -static struct pci_device_id dev_id_table[] = { +static const struct pci_device_id dev_id_table[] = { { PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), }, { 0, } }; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index ebd353bc78ff..09e287597c74 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -105,6 +105,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "tx_total_sent", "tx_total_fwd", "tx_err_pko", + "tx_err_pki", "tx_err_link", "tx_err_drop", @@ -826,6 +827,8 @@ lio_get_ethtool_stats(struct net_device *netdev, data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd); /*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko); + /*per_core_stats[j].link_stats[i].fromhost.fw_err_pki */ + data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pki); /*per_core_stats[j].link_stats[i].fromhost.fw_err_link */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link); /*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost. @@ -1568,6 +1571,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev, tstats->fw_total_sent = rsp_tstats->fw_total_sent; tstats->fw_total_fwd = rsp_tstats->fw_total_fwd; tstats->fw_err_pko = rsp_tstats->fw_err_pko; + tstats->fw_err_pki = rsp_tstats->fw_err_pki; tstats->fw_err_link = rsp_tstats->fw_err_link; tstats->fw_err_drop = rsp_tstats->fw_err_drop; tstats->fw_tso = rsp_tstats->fw_tso; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 51583ae4b1eb..1d8fefa9ce64 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2544,8 +2544,8 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, { struct octeon_droq_ops droq_ops; struct net_device *netdev; - static int cpu_id; - static int cpu_id_modulus; + int cpu_id; + int cpu_id_modulus; struct octeon_droq *droq; struct napi_struct *napi; int q, q_no, retval = 0; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 9b247102eb92..935ff299cdd9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1663,10 +1663,10 @@ static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx) { struct octeon_droq_ops droq_ops; struct net_device *netdev; - static int cpu_id_modulus; + int cpu_id_modulus; struct octeon_droq *droq; struct napi_struct *napi; - static int cpu_id; + int cpu_id; int num_tx_descs; struct lio *lio; int retval = 0; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 231dd7fbfb80..3b9e3646b971 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -27,8 +27,8 @@ #define LIQUIDIO_PACKAGE "" #define LIQUIDIO_BASE_MAJOR_VERSION 1 -#define LIQUIDIO_BASE_MINOR_VERSION 5 -#define LIQUIDIO_BASE_MICRO_VERSION 1 +#define LIQUIDIO_BASE_MINOR_VERSION 6 +#define LIQUIDIO_BASE_MICRO_VERSION 0 #define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ __stringify(LIQUIDIO_BASE_MINOR_VERSION) #define LIQUIDIO_MICRO_VERSION "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) @@ -768,6 +768,7 @@ struct nic_rx_stats { /* firmware stats */ u64 fw_total_rcvd; u64 fw_total_fwd; + u64 fw_total_fwd_bytes; u64 fw_err_pko; u64 fw_err_link; u64 fw_err_drop; @@ -814,6 +815,7 @@ struct nic_tx_stats { u64 fw_tso; /* number of tso requests */ u64 fw_tso_fwd; /* number of packets segmented in tso */ u64 fw_tx_vxlan; + u64 fw_err_pki; }; struct oct_link_stats { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index e08f7600f986..15ad1ab2c0c7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -42,9 +42,6 @@ module_param(console_bitmask, int, 0644); MODULE_PARM_DESC(console_bitmask, "Bitmask indicating which consoles have debug output redirected to syslog."); -#define MIN(a, b) min((a), (b)) -#define CAST_ULL(v) ((u64)(v)) - #define BOOTLOADER_PCI_READ_BUFFER_DATA_ADDR 0x0006c008 #define BOOTLOADER_PCI_READ_BUFFER_LEN_ADDR 0x0006c004 #define BOOTLOADER_PCI_READ_BUFFER_OWNER_ADDR 0x0006c000 @@ -234,7 +231,7 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct, (exact_match && major_version != exact_match)) { dev_err(&oct->pci_dev->dev, "bootmem ver mismatch %d.%d addr:0x%llx\n", major_version, minor_version, - CAST_ULL(oct->bootmem_desc_addr)); + (long long)oct->bootmem_desc_addr); return -1; } else { return 0; @@ -704,7 +701,7 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num, if (bytes_to_read <= 0) return bytes_to_read; - bytes_to_read = MIN(bytes_to_read, (s32)buf_size); + bytes_to_read = min_t(s32, bytes_to_read, buf_size); /* Check to see if what we want to read is not contiguous, and limit * ourselves to the contiguous block diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 623e28ca736e..f10014f7ae88 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -876,11 +876,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->num_iqs = 0; - oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]), + oct->instr_queue[0] = vzalloc_node(sizeof(*oct->instr_queue[0]), numa_node); if (!oct->instr_queue[0]) oct->instr_queue[0] = - vmalloc(sizeof(struct octeon_instr_queue)); + vzalloc(sizeof(struct octeon_instr_queue)); if (!oct->instr_queue[0]) return 1; memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue)); @@ -923,9 +923,9 @@ int octeon_setup_output_queues(struct octeon_device *oct) desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf)); } oct->num_oqs = 0; - oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node); + oct->droq[0] = vzalloc_node(sizeof(*oct->droq[0]), numa_node); if (!oct->droq[0]) - oct->droq[0] = vmalloc(sizeof(*oct->droq[0])); + oct->droq[0] = vzalloc(sizeof(*oct->droq[0])); if (!oct->droq[0]) return 1; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index c90ed48ae8ab..ad464788c923 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -22,6 +22,8 @@ #ifndef _OCTEON_DEVICE_H_ #define _OCTEON_DEVICE_H_ +#include <linux/interrupt.h> + /** PCI VendorId Device Id */ #define OCTEON_CN68XX_PCIID 0x91177d #define OCTEON_CN66XX_PCIID 0x92177d diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 2e190deb2233..f7b5d68eb4cf 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -145,6 +145,8 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct, for (i = 0; i < droq->max_count; i++) { pg_info = &droq->recv_buf_list[i].pg_info; + if (!pg_info) + continue; if (pg_info->dma) lio_unmap_ring(oct->pci_dev, @@ -275,12 +277,12 @@ int octeon_init_droq(struct octeon_device *oct, droq->max_count); droq->recv_buf_list = (struct octeon_recv_buffer *) - vmalloc_node(droq->max_count * + vzalloc_node(droq->max_count * OCT_DROQ_RECVBUF_SIZE, numa_node); if (!droq->recv_buf_list) droq->recv_buf_list = (struct octeon_recv_buffer *) - vmalloc(droq->max_count * + vzalloc(droq->max_count * OCT_DROQ_RECVBUF_SIZE); if (!droq->recv_buf_list) { dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 0bc6a4ffce30..6a015362c340 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -793,7 +793,9 @@ static struct attribute *cxgb3_attrs[] = { NULL }; -static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs }; +static const struct attribute_group cxgb3_attr_group = { + .attrs = cxgb3_attrs, +}; static ssize_t tm_attr_show(struct device *d, char *buf, int sched) @@ -880,7 +882,9 @@ static struct attribute *offload_attrs[] = { NULL }; -static struct attribute_group offload_attr_group = {.attrs = offload_attrs }; +static const struct attribute_group offload_attr_group = { + .attrs = offload_attrs, +}; /* * Sends an sk_buff to an offload queue driver diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ef4be781fd05..1978abbc6ceb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -338,10 +338,12 @@ struct adapter_params { unsigned int sf_nsec; /* # of flash sectors */ unsigned int sf_fw_start; /* start of FW image in flash */ - unsigned int fw_vers; - unsigned int bs_vers; /* bootstrap version */ - unsigned int tp_vers; - unsigned int er_vers; /* expansion ROM version */ + unsigned int fw_vers; /* firmware version */ + unsigned int bs_vers; /* bootstrap version */ + unsigned int tp_vers; /* TP microcode version */ + unsigned int er_vers; /* expansion ROM version */ + unsigned int scfg_vers; /* Serial Configuration version */ + unsigned int vpd_vers; /* VPD Version */ u8 api_vers[7]; unsigned short mtus[NMTUS]; @@ -1407,6 +1409,10 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers); int t4_get_bs_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); int t4_get_exprom_version(struct adapter *adapter, u32 *vers); +int t4_get_scfg_version(struct adapter *adapter, u32 *vers); +int t4_get_vpd_version(struct adapter *adapter, u32 *vers); +int t4_get_version_info(struct adapter *adapter); +void t4_dump_version_info(struct adapter *adapter); int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, const u8 *fw_data, unsigned int fw_size, struct fw_hdr *card_fw, enum dev_state state, int *reset); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 26eb00a45db1..03f593e84c24 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -801,6 +801,104 @@ static int set_link_ksettings(struct net_device *dev, return ret; } +/* Translate the Firmware FEC value into the ethtool value. */ +static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec) +{ + unsigned int eth_fec = 0; + + if (fw_fec & FW_PORT_CAP_FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (fw_fec & FW_PORT_CAP_FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +/* Translate Common Code FEC value into ethtool value. */ +static inline unsigned int cc_to_eth_fec(unsigned int cc_fec) +{ + unsigned int eth_fec = 0; + + if (cc_fec & FEC_AUTO) + eth_fec |= ETHTOOL_FEC_AUTO; + if (cc_fec & FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (cc_fec & FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +/* Translate ethtool FEC value into Common Code value. */ +static inline unsigned int eth_to_cc_fec(unsigned int eth_fec) +{ + unsigned int cc_fec = 0; + + if (eth_fec & ETHTOOL_FEC_OFF) + return cc_fec; + + if (eth_fec & ETHTOOL_FEC_AUTO) + cc_fec |= FEC_AUTO; + if (eth_fec & ETHTOOL_FEC_RS) + cc_fec |= FEC_RS; + if (eth_fec & ETHTOOL_FEC_BASER) + cc_fec |= FEC_BASER_RS; + + return cc_fec; +} + +static int get_fecparam(struct net_device *dev, struct ethtool_fecparam *fec) +{ + const struct port_info *pi = netdev_priv(dev); + const struct link_config *lc = &pi->link_cfg; + + /* Translate the Firmware FEC Support into the ethtool value. We + * always support IEEE 802.3 "automatic" selection of Link FEC type if + * any FEC is supported. + */ + fec->fec = fwcap_to_eth_fec(lc->supported); + if (fec->fec != ETHTOOL_FEC_OFF) + fec->fec |= ETHTOOL_FEC_AUTO; + + /* Translate the current internal FEC parameters into the + * ethtool values. + */ + fec->active_fec = cc_to_eth_fec(lc->fec); + + return 0; +} + +static int set_fecparam(struct net_device *dev, struct ethtool_fecparam *fec) +{ + struct port_info *pi = netdev_priv(dev); + struct link_config *lc = &pi->link_cfg; + struct link_config old_lc; + int ret; + + /* Save old Link Configuration in case the L1 Configure below + * fails. + */ + old_lc = *lc; + + /* Try to perform the L1 Configure and return the result of that + * effort. If it fails, revert the attempted change. + */ + lc->requested_fec = eth_to_cc_fec(fec->fec); + ret = t4_link_l1cfg(pi->adapter, pi->adapter->mbox, + pi->tx_chan, lc); + if (ret) + *lc = old_lc; + return ret; +} + static void get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) { @@ -1255,6 +1353,8 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, static const struct ethtool_ops cxgb_ethtool_ops = { .get_link_ksettings = get_link_ksettings, .set_link_ksettings = set_link_ksettings, + .get_fecparam = get_fecparam, + .set_fecparam = set_fecparam, .get_drvinfo = get_drvinfo, .get_msglevel = get_msglevel, .set_msglevel = set_msglevel, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e403fa18f1b1..fdf220aa08d6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3610,11 +3610,8 @@ static int adap_init0(struct adapter *adap) * later reporting and B. to warn if the currently loaded firmware * is excessively mismatched relative to the driver.) */ - t4_get_fw_version(adap, &adap->params.fw_vers); - t4_get_bs_version(adap, &adap->params.bs_vers); - t4_get_tp_version(adap, &adap->params.tp_vers); - t4_get_exprom_version(adap, &adap->params.er_vers); + t4_get_version_info(adap); ret = t4_check_fw_version(adap); /* If firmware is too old (not supported by driver) force an update. */ if (ret) @@ -4560,56 +4557,8 @@ static void cxgb4_check_pcie_caps(struct adapter *adap) /* Dump basic information about the adapter */ static void print_adapter_info(struct adapter *adapter) { - /* Device information */ - dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n", - adapter->params.vpd.id, - CHELSIO_CHIP_RELEASE(adapter->params.chip)); - dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n", - adapter->params.vpd.sn, adapter->params.vpd.pn); - - /* Firmware Version */ - if (!adapter->params.fw_vers) - dev_warn(adapter->pdev_dev, "No firmware loaded\n"); - else - dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers)); - - /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap - * Firmware, so dev_info() is more appropriate here.) - */ - if (!adapter->params.bs_vers) - dev_info(adapter->pdev_dev, "No bootstrap loaded\n"); - else - dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers)); - - /* TP Microcode Version */ - if (!adapter->params.tp_vers) - dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n"); - else - dev_info(adapter->pdev_dev, - "TP Microcode version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers)); - - /* Expansion ROM version */ - if (!adapter->params.er_vers) - dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n"); - else - dev_info(adapter->pdev_dev, - "Expansion ROM version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers)); + /* Hardware/Firmware/etc. Version/Revision IDs */ + t4_dump_version_info(adapter); /* Software/Hardware configuration */ dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n", diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 82bf7aac6cdb..24087c886974 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0xd010, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, - 0xf000, 0x11190, + 0xf000, 0x11110, + 0x11118, 0x11190, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x190e4, @@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, - 0x30038, 0x30038, - 0x30040, 0x30040, 0x30100, 0x30144, 0x30190, 0x301a0, 0x301a8, 0x301b8, @@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x33c3c, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, - 0x34038, 0x34038, - 0x34040, 0x34040, 0x34100, 0x34144, 0x34190, 0x341a0, 0x341a8, 0x341b8, @@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x37c3c, 0x37c50, 0x37cf0, 0x37cfc, 0x38000, 0x38030, - 0x38038, 0x38038, - 0x38040, 0x38040, 0x38100, 0x38144, 0x38190, 0x381a0, 0x381a8, 0x381b8, @@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x3bc3c, 0x3bc50, 0x3bcf0, 0x3bcfc, 0x3c000, 0x3c030, - 0x3c038, 0x3c038, - 0x3c040, 0x3c040, 0x3c100, 0x3c144, 0x3c190, 0x3c1a0, 0x3c1a8, 0x3c1b8, @@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1190, 0x1194, 0x11a0, 0x11a4, 0x11b0, 0x11b4, - 0x11fc, 0x1258, - 0x1280, 0x12d4, - 0x12d9, 0x12d9, - 0x12de, 0x12de, - 0x12e3, 0x12e3, - 0x12e8, 0x133c, + 0x11fc, 0x1274, + 0x1280, 0x133c, 0x1800, 0x18fc, 0x3000, 0x302c, 0x3060, 0x30b0, @@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x5ea0, 0x5eb0, 0x5ec0, 0x5ec0, 0x5ec8, 0x5ed0, + 0x5ee0, 0x5ee0, + 0x5ef0, 0x5ef0, + 0x5f00, 0x5f00, 0x6000, 0x6020, 0x6028, 0x6040, 0x6058, 0x609c, @@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0xd300, 0xd31c, 0xdfc0, 0xdfe0, 0xe000, 0xf008, + 0xf010, 0xf018, + 0xf020, 0xf028, 0x11000, 0x11014, 0x11048, 0x1106c, 0x11074, 0x11088, @@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, - 0x30038, 0x30038, - 0x30040, 0x30040, - 0x30048, 0x30048, - 0x30050, 0x30050, - 0x3005c, 0x30060, - 0x30068, 0x30068, - 0x30070, 0x30070, 0x30100, 0x30168, 0x30190, 0x301a0, 0x301a8, 0x301b8, @@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x326a8, 0x326a8, 0x326ec, 0x326ec, 0x32a00, 0x32abc, - 0x32b00, 0x32b38, + 0x32b00, 0x32b18, + 0x32b20, 0x32b38, 0x32b40, 0x32b58, 0x32b60, 0x32b78, 0x32c00, 0x32c00, 0x32c08, 0x32c3c, - 0x32e00, 0x32e2c, - 0x32f00, 0x32f2c, 0x33000, 0x3302c, 0x33034, 0x33050, 0x33058, 0x33058, @@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x33c38, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, - 0x34038, 0x34038, - 0x34040, 0x34040, - 0x34048, 0x34048, - 0x34050, 0x34050, - 0x3405c, 0x34060, - 0x34068, 0x34068, - 0x34070, 0x34070, 0x34100, 0x34168, 0x34190, 0x341a0, 0x341a8, 0x341b8, @@ -2465,13 +2444,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x366a8, 0x366a8, 0x366ec, 0x366ec, 0x36a00, 0x36abc, - 0x36b00, 0x36b38, + 0x36b00, 0x36b18, + 0x36b20, 0x36b38, 0x36b40, 0x36b58, 0x36b60, 0x36b78, 0x36c00, 0x36c00, 0x36c08, 0x36c3c, - 0x36e00, 0x36e2c, - 0x36f00, 0x36f2c, 0x37000, 0x3702c, 0x37034, 0x37050, 0x37058, 0x37058, @@ -2545,8 +2523,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x40280, 0x40280, 0x40304, 0x40304, 0x40330, 0x4033c, - 0x41304, 0x413b8, - 0x413c0, 0x413c8, + 0x41304, 0x413c8, 0x413d0, 0x413dc, 0x413f0, 0x413f0, 0x41400, 0x4140c, @@ -3100,6 +3077,179 @@ int t4_get_exprom_version(struct adapter *adap, u32 *vers) } /** + * t4_get_vpd_version - return the VPD version + * @adapter: the adapter + * @vers: where to place the version + * + * Reads the VPD via the Firmware interface (thus this can only be called + * once we're ready to issue Firmware commands). The format of the + * VPD version is adapter specific. Returns 0 on success, an error on + * failure. + * + * Note that early versions of the Firmware didn't include the ability + * to retrieve the VPD version, so we zero-out the return-value parameter + * in that case to avoid leaving it with garbage in it. + * + * Also note that the Firmware will return its cached copy of the VPD + * Revision ID, not the actual Revision ID as written in the Serial + * EEPROM. This is only an issue if a new VPD has been written and the + * Firmware/Chip haven't yet gone through a RESET sequence. So it's best + * to defer calling this routine till after a FW_RESET_CMD has been issued + * if the Host Driver will be performing a full adapter initialization. + */ +int t4_get_vpd_version(struct adapter *adapter, u32 *vers) +{ + u32 vpdrev_param; + int ret; + + vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV)); + ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0, + 1, &vpdrev_param, vers); + if (ret) + *vers = 0; + return ret; +} + +/** + * t4_get_scfg_version - return the Serial Configuration version + * @adapter: the adapter + * @vers: where to place the version + * + * Reads the Serial Configuration Version via the Firmware interface + * (thus this can only be called once we're ready to issue Firmware + * commands). The format of the Serial Configuration version is + * adapter specific. Returns 0 on success, an error on failure. + * + * Note that early versions of the Firmware didn't include the ability + * to retrieve the Serial Configuration version, so we zero-out the + * return-value parameter in that case to avoid leaving it with + * garbage in it. + * + * Also note that the Firmware will return its cached copy of the Serial + * Initialization Revision ID, not the actual Revision ID as written in + * the Serial EEPROM. This is only an issue if a new VPD has been written + * and the Firmware/Chip haven't yet gone through a RESET sequence. So + * it's best to defer calling this routine till after a FW_RESET_CMD has + * been issued if the Host Driver will be performing a full adapter + * initialization. + */ +int t4_get_scfg_version(struct adapter *adapter, u32 *vers) +{ + u32 scfgrev_param; + int ret; + + scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV)); + ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0, + 1, &scfgrev_param, vers); + if (ret) + *vers = 0; + return ret; +} + +/** + * t4_get_version_info - extract various chip/firmware version information + * @adapter: the adapter + * + * Reads various chip/firmware version numbers and stores them into the + * adapter Adapter Parameters structure. If any of the efforts fails + * the first failure will be returned, but all of the version numbers + * will be read. + */ +int t4_get_version_info(struct adapter *adapter) +{ + int ret = 0; + + #define FIRST_RET(__getvinfo) \ + do { \ + int __ret = __getvinfo; \ + if (__ret && !ret) \ + ret = __ret; \ + } while (0) + + FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers)); + FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers)); + FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers)); + FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers)); + FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers)); + FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers)); + + #undef FIRST_RET + return ret; +} + +/** + * t4_dump_version_info - dump all of the adapter configuration IDs + * @adapter: the adapter + * + * Dumps all of the various bits of adapter configuration version/revision + * IDs information. This is typically called at some point after + * t4_get_version_info() has been called. + */ +void t4_dump_version_info(struct adapter *adapter) +{ + /* Device information */ + dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n", + adapter->params.vpd.id, + CHELSIO_CHIP_RELEASE(adapter->params.chip)); + dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n", + adapter->params.vpd.sn, adapter->params.vpd.pn); + + /* Firmware Version */ + if (!adapter->params.fw_vers) + dev_warn(adapter->pdev_dev, "No firmware loaded\n"); + else + dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers)); + + /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap + * Firmware, so dev_info() is more appropriate here.) + */ + if (!adapter->params.bs_vers) + dev_info(adapter->pdev_dev, "No bootstrap loaded\n"); + else + dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers)); + + /* TP Microcode Version */ + if (!adapter->params.tp_vers) + dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n"); + else + dev_info(adapter->pdev_dev, + "TP Microcode version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers)); + + /* Expansion ROM version */ + if (!adapter->params.er_vers) + dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n"); + else + dev_info(adapter->pdev_dev, + "Expansion ROM version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers)); + + /* Serial Configuration version */ + dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n", + adapter->params.scfg_vers); + + /* VPD Version */ + dev_info(adapter->pdev_dev, "VPD version: %#x\n", + adapter->params.vpd_vers); +} + +/** * t4_check_fw_version - check if the FW is supported with this driver * @adap: the adapter * @@ -3690,11 +3840,64 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf) FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ FW_PORT_CAP_ANEG) +/* Translate Firmware Port Capabilities Pause specification to Common Code */ +static inline unsigned int fwcap_to_cc_pause(unsigned int fw_pause) +{ + unsigned int cc_pause = 0; + + if (fw_pause & FW_PORT_CAP_FC_RX) + cc_pause |= PAUSE_RX; + if (fw_pause & FW_PORT_CAP_FC_TX) + cc_pause |= PAUSE_TX; + + return cc_pause; +} + +/* Translate Common Code Pause specification into Firmware Port Capabilities */ +static inline unsigned int cc_to_fwcap_pause(unsigned int cc_pause) +{ + unsigned int fw_pause = 0; + + if (cc_pause & PAUSE_RX) + fw_pause |= FW_PORT_CAP_FC_RX; + if (cc_pause & PAUSE_TX) + fw_pause |= FW_PORT_CAP_FC_TX; + + return fw_pause; +} + +/* Translate Firmware Forward Error Correction specification to Common Code */ +static inline unsigned int fwcap_to_cc_fec(unsigned int fw_fec) +{ + unsigned int cc_fec = 0; + + if (fw_fec & FW_PORT_CAP_FEC_RS) + cc_fec |= FEC_RS; + if (fw_fec & FW_PORT_CAP_FEC_BASER_RS) + cc_fec |= FEC_BASER_RS; + + return cc_fec; +} + +/* Translate Common Code Forward Error Correction specification to Firmware */ +static inline unsigned int cc_to_fwcap_fec(unsigned int cc_fec) +{ + unsigned int fw_fec = 0; + + if (cc_fec & FEC_RS) + fw_fec |= FW_PORT_CAP_FEC_RS; + if (cc_fec & FEC_BASER_RS) + fw_fec |= FW_PORT_CAP_FEC_BASER_RS; + + return fw_fec; +} + /** * t4_link_l1cfg - apply link configuration to MAC/PHY - * @phy: the PHY to setup - * @mac: the MAC to setup - * @lc: the requested link configuration + * @adapter: the adapter + * @mbox: the Firmware Mailbox to use + * @port: the Port ID + * @lc: the Port's Link Configuration * * Set up a port's MAC and PHY according to a desired link configuration. * - If the PHY can auto-negotiate first decide what to advertise, then @@ -3707,22 +3910,46 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc) { struct fw_port_cmd c; - unsigned int mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO); - unsigned int fc = 0, fec = 0, fw_fec = 0; + unsigned int fw_mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO); + unsigned int fw_fc, cc_fec, fw_fec; + unsigned int rcap; lc->link_ok = 0; - if (lc->requested_fc & PAUSE_RX) - fc |= FW_PORT_CAP_FC_RX; - if (lc->requested_fc & PAUSE_TX) - fc |= FW_PORT_CAP_FC_TX; - fec = lc->requested_fec & FEC_AUTO ? lc->auto_fec : lc->requested_fec; + /* Convert driver coding of Pause Frame Flow Control settings into the + * Firmware's API. + */ + fw_fc = cc_to_fwcap_pause(lc->requested_fc); + + /* Convert Common Code Forward Error Control settings into the + * Firmware's API. If the current Requested FEC has "Automatic" + * (IEEE 802.3) specified, then we use whatever the Firmware + * sent us as part of it's IEEE 802.3-based interpratation of + * the Transceiver Module EPROM FEC parameters. Otherwise we + * use whatever is in the current Requested FEC settings. + */ + if (lc->requested_fec & FEC_AUTO) + cc_fec = lc->auto_fec; + else + cc_fec = lc->requested_fec; + fw_fec = cc_to_fwcap_fec(cc_fec); - if (fec & FEC_RS) - fw_fec |= FW_PORT_CAP_FEC_RS; - if (fec & FEC_BASER_RS) - fw_fec |= FW_PORT_CAP_FEC_BASER_RS; + /* Figure out what our Requested Port Capabilities are going to be. + */ + if (!(lc->supported & FW_PORT_CAP_ANEG)) { + rcap = (lc->supported & ADVERT_MASK) | fw_fc | fw_fec; + lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); + lc->fec = cc_fec; + } else if (lc->autoneg == AUTONEG_DISABLE) { + rcap = lc->requested_speed | fw_fc | fw_fec | fw_mdi; + lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); + lc->fec = cc_fec; + } else { + rcap = lc->advertising | fw_fc | fw_fec | fw_mdi; + } + /* And send that on to the Firmware ... + */ memset(&c, 0, sizeof(c)); c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) | FW_CMD_REQUEST_F | FW_CMD_EXEC_F | @@ -3730,19 +3957,7 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port, c.action_to_len16 = cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) | FW_LEN16(c)); - - if (!(lc->supported & FW_PORT_CAP_ANEG)) { - c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) | - fc | fw_fec); - lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); - } else if (lc->autoneg == AUTONEG_DISABLE) { - c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc | - fw_fec | mdi); - lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); - } else - c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc | - fw_fec | mdi); - + c.u.l1cfg.rcap = cpu_to_be32(rcap); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -7480,19 +7695,28 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc) void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) { const struct fw_port_cmd *p = (const void *)rpl; + unsigned int acaps = be16_to_cpu(p->u.info.acap); struct adapter *adap = pi->adapter; /* link/module state change message */ - int speed = 0, fc = 0; + int speed = 0, fc, fec; struct link_config *lc; u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype); int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0; u32 mod = FW_PORT_CMD_MODTYPE_G(stat); + /* Unfortunately the format of the Link Status returned by the + * Firmware isn't the same as the Firmware Port Capabilities bitfield + * used everywhere else ... + */ + fc = 0; if (stat & FW_PORT_CMD_RXPAUSE_F) fc |= PAUSE_RX; if (stat & FW_PORT_CMD_TXPAUSE_F) fc |= PAUSE_TX; + + fec = fwcap_to_cc_fec(acaps); + if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M)) speed = 100; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G)) @@ -7509,11 +7733,20 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) lc = &pi->link_cfg; if (mod != pi->mod_type) { + /* When a new Transceiver Module is inserted, the Firmware + * will examine any Forward Error Correction parameters + * present in the Transceiver Module i2c EPROM and determine + * the supported and recommended FEC settings from those + * based on IEEE 802.3 standards. We always record the + * IEEE 802.3 recommended "automatic" settings. + */ + lc->auto_fec = fec; + pi->mod_type = mod; t4_os_portmod_changed(adap, pi->port_id); } if (link_ok != lc->link_ok || speed != lc->speed || - fc != lc->fc) { /* something changed */ + fc != lc->fc || fec != lc->fec) { /* something changed */ if (!link_ok && lc->link_ok) { unsigned char rc = FW_PORT_CMD_LINKDNRC_G(stat); @@ -7525,6 +7758,8 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) lc->link_ok = link_ok; lc->speed = speed; lc->fc = fc; + lc->fec = fec; + lc->supported = be16_to_cpu(p->u.info.pcap); lc->lp_advertising = be16_to_cpu(p->u.info.lpacap); @@ -7614,7 +7849,8 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p) /** * init_link_config - initialize a link's SW state * @lc: structure holding the link state - * @caps: link capabilities + * @pcaps: link Port Capabilities + * @acaps: link current Advertised Port Capabilities * * Initializes the SW state maintained for each link, including the link's * capabilities and default speed/flow-control/autonegotiation settings. @@ -7627,15 +7863,11 @@ static void init_link_config(struct link_config *lc, unsigned int pcaps, lc->requested_speed = 0; lc->speed = 0; lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX; - lc->auto_fec = 0; /* For Forward Error Control, we default to whatever the Firmware * tells us the Link is currently advertising. */ - if (acaps & FW_PORT_CAP_FEC_RS) - lc->auto_fec |= FEC_RS; - if (acaps & FW_PORT_CAP_FEC_BASER_RS) - lc->auto_fec |= FEC_BASER_RS; + lc->auto_fec = fwcap_to_cc_fec(acaps); lc->requested_fec = FEC_AUTO; lc->fec = lc->auto_fec; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0ebed64d62d3..ad825fbc21a5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1124,6 +1124,8 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, FW_PARAMS_PARAM_DEV_FWCACHE = 0x18, + FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A, + FW_PARAMS_PARAM_DEV_VPDREV = 0x1B, FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E, }; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 17e566a8b345..84394b43c0a1 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1303,7 +1303,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 0x00, 'L', 'i', 'n', 'u', 'x' }; static int last_irq; - static int multiport_cnt; /* For four-port boards w/one EEPROM */ int i, irq; unsigned short sum; unsigned char *ee_data; @@ -1557,7 +1556,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else if (ee_data[0] == 0xff && ee_data[1] == 0xff && ee_data[2] == 0) { sa_offset = 2; /* Grrr, damn Matrox boards. */ - multiport_cnt = 4; } #ifdef CONFIG_MIPS_COBALT if ((pdev->bus->number == 0) && diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index 4ee042c034a1..1b79a6defd56 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -73,7 +73,7 @@ #define ETHERCAT_MASTER_ID 0x14 -static struct pci_device_id ids[] = { +static const struct pci_device_id ids[] = { { PCI_DEVICE(0x15ec, 0x5000), }, { 0, } }; diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 2b62841c4c63..05989aafaf32 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -139,10 +139,7 @@ int be_roce_register_driver(struct ocrdma_driver *drv) } ocrdma_drv = drv; list_for_each_entry(dev, &be_adapter_list, entry) { - struct net_device *netdev; - _be_roce_dev_add(dev); - netdev = dev->netdev; } mutex_unlock(&be_adapter_list_lock); return 0; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 757b873735a5..550ea1ec7b6c 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -398,8 +398,8 @@ static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev) of_dev = of_find_device_by_node(mac_node); if (!of_dev) { - dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n", - mac_node->full_name); + dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n", + mac_node); of_node_put(mac_node); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a6e323f15637..df09b254553d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -173,10 +173,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #endif /* CONFIG_M5272 */ /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets. + * + * 2048 byte skbufs are allocated. However, alignment requirements + * varies between FEC variants. Worst case is 64, so round down by 64. */ -#define PKT_MAXBUF_SIZE 1522 +#define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64)) #define PKT_MINBUF_SIZE 64 -#define PKT_MAXBLR_SIZE 1536 /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) @@ -851,7 +853,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); - writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) @@ -1904,8 +1906,10 @@ static int fec_enet_mii_probe(struct net_device *ndev) phy_dev = of_phy_connect(ndev, fep->phy_node, &fec_enet_adjust_link, 0, fep->phy_interface); - if (!phy_dev) + if (!phy_dev) { + netdev_err(ndev, "Unable to connect to phy\n"); return -ENODEV; + } } else { /* check for attached phy */ for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index aa8cf5d2a53c..6d7269d87a85 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -960,8 +960,8 @@ static int mpc52xx_fec_probe(struct platform_device *op) /* We're done ! */ platform_set_drvdata(op, ndev); - netdev_info(ndev, "%s MAC %pM\n", - op->dev.of_node->full_name, ndev->dev_addr); + netdev_info(ndev, "%pOF MAC %pM\n", + op->dev.of_node, ndev->dev_addr); return 0; diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 4aefe2438969..e714b8fa55eb 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1925,8 +1925,8 @@ static int fman_reset(struct fman *fman) guts_regs = of_iomap(guts_node, 0); if (!guts_regs) { - dev_err(fman->dev, "%s: Couldn't map %s regs\n", - __func__, guts_node->full_name); + dev_err(fman->dev, "%s: Couldn't map %pOF regs\n", + __func__, guts_node); goto guts_regs; } #define FMAN1_ALL_MACS_MASK 0xFCC00000 @@ -2780,8 +2780,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev) err = of_property_read_u32(fm_node, "cell-index", &val); if (err) { - dev_err(&of_dev->dev, "%s: failed to read cell-index for %s\n", - __func__, fm_node->full_name); + dev_err(&of_dev->dev, "%s: failed to read cell-index for %pOF\n", + __func__, fm_node); goto fman_node_put; } fman->dts_params.id = (u8)val; @@ -2834,8 +2834,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev) err = of_property_read_u32_array(fm_node, "fsl,qman-channel-range", &range[0], 2); if (err) { - dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %s\n", - __func__, fm_node->full_name); + dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %pOF\n", + __func__, fm_node); goto fman_node_put; } fman->dts_params.qman_channel_base = range[0]; diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 57bf44fa16a1..49bfa11f2d20 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1720,8 +1720,8 @@ static int fman_port_probe(struct platform_device *of_dev) err = of_property_read_u32(port_node, "cell-index", &val); if (err) { - dev_err(port->dev, "%s: reading cell-index for %s failed\n", - __func__, port_node->full_name); + dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", + __func__, port_node); err = -EINVAL; goto return_err; } diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 6e67d22fd0d5..14cd2c8b0024 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -698,8 +698,8 @@ static int mac_probe(struct platform_device *_of_dev) priv->internal_phy_node = of_parse_phandle(mac_node, "pcsphy-handle", 0); } else { - dev_err(dev, "MAC node (%s) contains unsupported MAC\n", - mac_node->full_name); + dev_err(dev, "MAC node (%pOF) contains unsupported MAC\n", + mac_node); err = -EINVAL; goto _return; } @@ -712,16 +712,15 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the FM node */ dev_node = of_get_parent(mac_node); if (!dev_node) { - dev_err(dev, "of_get_parent(%s) failed\n", - mac_node->full_name); + dev_err(dev, "of_get_parent(%pOF) failed\n", + mac_node); err = -EINVAL; goto _return_dev_set_drvdata; } of_dev = of_find_device_by_node(dev_node); if (!of_dev) { - dev_err(dev, "of_find_device_by_node(%s) failed\n", - dev_node->full_name); + dev_err(dev, "of_find_device_by_node(%pOF) failed\n", dev_node); err = -EINVAL; goto _return_of_node_put; } @@ -729,8 +728,7 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the FMan cell-index */ err = of_property_read_u32(dev_node, "cell-index", &val); if (err) { - dev_err(dev, "failed to read cell-index for %s\n", - dev_node->full_name); + dev_err(dev, "failed to read cell-index for %pOF\n", dev_node); err = -EINVAL; goto _return_of_node_put; } @@ -739,7 +737,7 @@ static int mac_probe(struct platform_device *_of_dev) priv->fman = fman_bind(&of_dev->dev); if (!priv->fman) { - dev_err(dev, "fman_bind(%s) failed\n", dev_node->full_name); + dev_err(dev, "fman_bind(%pOF) failed\n", dev_node); err = -ENODEV; goto _return_of_node_put; } @@ -749,8 +747,8 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the address of the memory mapped registers */ err = of_address_to_resource(mac_node, 0, &res); if (err < 0) { - dev_err(dev, "of_address_to_resource(%s) = %d\n", - mac_node->full_name, err); + dev_err(dev, "of_address_to_resource(%pOF) = %d\n", + mac_node, err); goto _return_dev_set_drvdata; } @@ -784,8 +782,7 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the cell-index */ err = of_property_read_u32(mac_node, "cell-index", &val); if (err) { - dev_err(dev, "failed to read cell-index for %s\n", - mac_node->full_name); + dev_err(dev, "failed to read cell-index for %pOF\n", mac_node); err = -EINVAL; goto _return_dev_set_drvdata; } @@ -794,8 +791,7 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the MAC address */ mac_addr = of_get_mac_address(mac_node); if (!mac_addr) { - dev_err(dev, "of_get_mac_address(%s) failed\n", - mac_node->full_name); + dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node); err = -EINVAL; goto _return_dev_set_drvdata; } @@ -804,15 +800,15 @@ static int mac_probe(struct platform_device *_of_dev) /* Get the port handles */ nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL); if (unlikely(nph < 0)) { - dev_err(dev, "of_count_phandle_with_args(%s, fsl,fman-ports) failed\n", - mac_node->full_name); + dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n", + mac_node); err = nph; goto _return_dev_set_drvdata; } if (nph != ARRAY_SIZE(mac_dev->port)) { - dev_err(dev, "Not supported number of fman-ports handles of mac node %s from device tree\n", - mac_node->full_name); + dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n", + mac_node); err = -EINVAL; goto _return_dev_set_drvdata; } @@ -821,24 +817,24 @@ static int mac_probe(struct platform_device *_of_dev) /* Find the port node */ dev_node = of_parse_phandle(mac_node, "fsl,fman-ports", i); if (!dev_node) { - dev_err(dev, "of_parse_phandle(%s, fsl,fman-ports) failed\n", - mac_node->full_name); + dev_err(dev, "of_parse_phandle(%pOF, fsl,fman-ports) failed\n", + mac_node); err = -EINVAL; goto _return_of_node_put; } of_dev = of_find_device_by_node(dev_node); if (!of_dev) { - dev_err(dev, "of_find_device_by_node(%s) failed\n", - dev_node->full_name); + dev_err(dev, "of_find_device_by_node(%pOF) failed\n", + dev_node); err = -EINVAL; goto _return_of_node_put; } mac_dev->port[i] = fman_port_bind(&of_dev->dev); if (!mac_dev->port[i]) { - dev_err(dev, "dev_get_drvdata(%s) failed\n", - dev_node->full_name); + dev_err(dev, "dev_get_drvdata(%pOF) failed\n", + dev_node); err = -EINVAL; goto _return_of_node_put; } @@ -849,8 +845,8 @@ static int mac_probe(struct platform_device *_of_dev) phy_if = of_get_phy_mode(mac_node); if (phy_if < 0) { dev_warn(dev, - "of_get_phy_mode() for %s failed. Defaulting to SGMII\n", - mac_node->full_name); + "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n", + mac_node); phy_if = PHY_INTERFACE_MODE_SGMII; } priv->phy_if = phy_if; diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index a10de1e9c157..80ad16acf0f1 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -267,8 +267,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) ret = of_address_to_resource(np, 0, &res); if (ret < 0) { - pr_debug("fsl-pq-mdio: no address range in node %s\n", - np->full_name); + pr_debug("fsl-pq-mdio: no address range in node %pOF\n", + np); continue; } @@ -280,8 +280,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) if (!iprop) { iprop = of_get_property(np, "device-id", NULL); if (!iprop) { - pr_debug("fsl-pq-mdio: no UCC ID in node %s\n", - np->full_name); + pr_debug("fsl-pq-mdio: no UCC ID in node %pOF\n", + np); continue; } } @@ -293,8 +293,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) * numbered from 1, not 0. */ if (ucc_set_qe_mux_mii_mng(id - 1) < 0) { - pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n", - np->full_name); + pr_debug("fsl-pq-mdio: invalid UCC ID in node %pOF\n", + np); continue; } @@ -442,8 +442,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) if (data->get_tbipa) { for_each_child_of_node(np, tbi) { if (strcmp(tbi->type, "tbi-phy") == 0) { - dev_dbg(&pdev->dev, "found TBI PHY node %s\n", - strrchr(tbi->full_name, '/') + 1); + dev_dbg(&pdev->dev, "found TBI PHY node %pOFP\n", + tbi); break; } } @@ -454,8 +454,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) if (!prop) { dev_err(&pdev->dev, - "missing 'reg' property in node %s\n", - tbi->full_name); + "missing 'reg' property in node %pOF\n", + tbi); err = -EBUSY; goto error; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a8db27e86a11..78cb20c67aa6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -595,7 +595,7 @@ static void hns_nic_self_test(struct net_device *ndev, set_bit(NIC_STATE_TESTING, &priv->state); if (if_running) - (void)dev_close(ndev); + dev_close(ndev); for (i = 0; i < SELF_TEST_TPYE_NUM; i++) { if (!st_param[i][1]) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index b9d310f20bcc..4878b7169e0f 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3102,8 +3102,7 @@ static int ehea_setup_ports(struct ehea_adapter *adapter) dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", NULL); if (!dn_log_port_id) { - pr_err("bad device node: eth_dn name=%s\n", - eth_dn->full_name); + pr_err("bad device node: eth_dn name=%pOF\n", eth_dn); continue; } @@ -3425,7 +3424,7 @@ static int ehea_probe_adapter(struct platform_device *dev) if (!adapter->handle) { dev_err(&dev->dev, "failed getting handle for adapter" - " '%s'\n", dev->dev.of_node->full_name); + " '%pOF'\n", dev->dev.of_node); ret = -ENODEV; goto out_free_ad; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 259e69a52ec5..95135d20458f 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -133,8 +133,7 @@ static inline void emac_report_timeout_error(struct emac_instance *dev, EMAC_FTR_440EP_PHY_CLK_FIX)) DBG(dev, "%s" NL, error); else if (net_ratelimit()) - printk(KERN_ERR "%s: %s\n", dev->ofdev->dev.of_node->full_name, - error); + printk(KERN_ERR "%pOF: %s\n", dev->ofdev->dev.of_node, error); } /* EMAC PHY clock workaround: @@ -2258,8 +2257,8 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev, strlcpy(info->driver, "ibm_emac", sizeof(info->driver)); strlcpy(info->version, DRV_VERSION, sizeof(info->version)); - snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %s", - dev->cell_index, dev->ofdev->dev.of_node->full_name); + snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF", + dev->cell_index, dev->ofdev->dev.of_node); } static const struct ethtool_ops emac_ethtool_ops = { @@ -2431,8 +2430,8 @@ static int emac_read_uint_prop(struct device_node *np, const char *name, const u32 *prop = of_get_property(np, name, &len); if (prop == NULL || len < sizeof(u32)) { if (fatal) - printk(KERN_ERR "%s: missing %s property\n", - np->full_name, name); + printk(KERN_ERR "%pOF: missing %s property\n", + np, name); return -ENODEV; } *val = *prop; @@ -2768,7 +2767,7 @@ static int emac_init_phy(struct emac_instance *dev) #endif mutex_unlock(&emac_phy_map_lock); if (i == 0x20) { - printk(KERN_WARNING "%s: can't find PHY!\n", np->full_name); + printk(KERN_WARNING "%pOF: can't find PHY!\n", np); return -ENXIO; } @@ -2894,8 +2893,8 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x; #else - printk(KERN_ERR "%s: Flow control not disabled!\n", - np->full_name); + printk(KERN_ERR "%pOF: Flow control not disabled!\n", + np); return -ENXIO; #endif } @@ -2918,8 +2917,7 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_TAH dev->features |= EMAC_FTR_HAS_TAH; #else - printk(KERN_ERR "%s: TAH support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: TAH support not enabled !\n", np); return -ENXIO; #endif } @@ -2928,8 +2926,7 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_ZMII dev->features |= EMAC_FTR_HAS_ZMII; #else - printk(KERN_ERR "%s: ZMII support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: ZMII support not enabled !\n", np); return -ENXIO; #endif } @@ -2938,8 +2935,7 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_RGMII dev->features |= EMAC_FTR_HAS_RGMII; #else - printk(KERN_ERR "%s: RGMII support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: RGMII support not enabled !\n", np); return -ENXIO; #endif } @@ -2947,8 +2943,8 @@ static int emac_init_config(struct emac_instance *dev) /* Read MAC-address */ p = of_get_property(np, "local-mac-address", NULL); if (p == NULL) { - printk(KERN_ERR "%s: Can't find local-mac-address property\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't find local-mac-address property\n", + np); return -ENXIO; } memcpy(dev->ndev->dev_addr, p, ETH_ALEN); @@ -3043,23 +3039,21 @@ static int emac_probe(struct platform_device *ofdev) dev->emac_irq = irq_of_parse_and_map(np, 0); dev->wol_irq = irq_of_parse_and_map(np, 1); if (!dev->emac_irq) { - printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); + printk(KERN_ERR "%pOF: Can't map main interrupt\n", np); goto err_free; } ndev->irq = dev->emac_irq; /* Map EMAC regs */ if (of_address_to_resource(np, 0, &dev->rsrc_regs)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't get registers address\n", np); goto err_irq_unmap; } // TODO : request_mem_region dev->emacp = ioremap(dev->rsrc_regs.start, resource_size(&dev->rsrc_regs)); if (dev->emacp == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); err = -ENOMEM; goto err_irq_unmap; } @@ -3068,8 +3062,7 @@ static int emac_probe(struct platform_device *ofdev) err = emac_wait_deps(dev); if (err) { printk(KERN_ERR - "%s: Timeout waiting for dependent devices\n", - np->full_name); + "%pOF: Timeout waiting for dependent devices\n", np); /* display more info about what's missing ? */ goto err_reg_unmap; } @@ -3084,8 +3077,8 @@ static int emac_probe(struct platform_device *ofdev) dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan); err = mal_register_commac(dev->mal, &dev->commac); if (err) { - printk(KERN_ERR "%s: failed to register with mal %s!\n", - np->full_name, dev->mal_dev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: failed to register with mal %pOF!\n", + np, dev->mal_dev->dev.of_node); goto err_rel_deps; } dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); @@ -3161,8 +3154,8 @@ static int emac_probe(struct platform_device *ofdev) err = register_netdev(ndev); if (err) { - printk(KERN_ERR "%s: failed to register net device (%d)!\n", - np->full_name, err); + printk(KERN_ERR "%pOF: failed to register net device (%d)!\n", + np, err); goto err_detach_tah; } @@ -3176,8 +3169,8 @@ static int emac_probe(struct platform_device *ofdev) wake_up_all(&emac_probe_wait); - printk(KERN_INFO "%s: EMAC-%d %s, MAC %pM\n", - ndev->name, dev->cell_index, np->full_name, ndev->dev_addr); + printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n", + ndev->name, dev->cell_index, np, ndev->dev_addr); if (dev->phy_mode == PHY_MODE_SGMII) printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name); diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h index 5bdfc174a07e..9d06d3be3161 100644 --- a/drivers/net/ethernet/ibm/emac/debug.h +++ b/drivers/net/ethernet/ibm/emac/debug.h @@ -31,7 +31,7 @@ #endif #define EMAC_DBG(d, name, fmt, arg...) \ - printk(KERN_DEBUG #name "%s: " fmt, d->ofdev->dev.of_node->full_name, ## arg) + printk(KERN_DEBUG #name "%pOF: " fmt, d->ofdev->dev.of_node, ## arg) #if DBG_LEVEL > 0 # define DBG(d,f,x...) EMAC_DBG(d, emac, f, ##x) diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 91b1a558f37d..2c74baa2398a 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -579,8 +579,8 @@ static int mal_probe(struct platform_device *ofdev) mal->features |= (MAL_FTR_CLEAR_ICINTSTAT | MAL_FTR_COMMON_ERR_INT); #else - printk(KERN_ERR "%s: Support for 405EZ not enabled!\n", - ofdev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n", + ofdev->dev.of_node); err = -ENODEV; goto fail; #endif @@ -687,8 +687,8 @@ static int mal_probe(struct platform_device *ofdev) mal_enable_eob_irq(mal); printk(KERN_INFO - "MAL v%d %s, %d TX channels, %d RX channels\n", - mal->version, ofdev->dev.of_node->full_name, + "MAL v%d %pOF, %d TX channels, %d RX channels\n", + mal->version, ofdev->dev.of_node, mal->num_tx_chans, mal->num_rx_chans); /* Advertise this instance to the rest of the world */ diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c index 206ccbbae7bb..c4a1ac38bba8 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.c +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -104,8 +104,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode) /* Check if we need to attach to a RGMII */ if (input < 0 || !rgmii_valid_mode(mode)) { - printk(KERN_ERR "%s: unsupported settings !\n", - ofdev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: unsupported settings !\n", + ofdev->dev.of_node); return -ENODEV; } @@ -114,8 +114,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode) /* Enable this input */ out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input)); - printk(KERN_NOTICE "%s: input %d in %s mode\n", - ofdev->dev.of_node->full_name, input, rgmii_mode_name(mode)); + printk(KERN_NOTICE "%pOF: input %d in %s mode\n", + ofdev->dev.of_node, input, rgmii_mode_name(mode)); ++dev->users; @@ -249,8 +249,7 @@ static int rgmii_probe(struct platform_device *ofdev) rc = -ENXIO; if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't get registers address\n", np); goto err_free; } @@ -258,8 +257,7 @@ static int rgmii_probe(struct platform_device *ofdev) dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start, sizeof(struct rgmii_regs)); if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); goto err_free; } @@ -278,8 +276,8 @@ static int rgmii_probe(struct platform_device *ofdev) out_be32(&dev->base->fer, 0); printk(KERN_INFO - "RGMII %s initialized with%s MDIO support\n", - ofdev->dev.of_node->full_name, + "RGMII %pOF initialized with%s MDIO support\n", + ofdev->dev.of_node, (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out"); wmb(); diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c index 32cb6c9007c5..9912456dca48 100644 --- a/drivers/net/ethernet/ibm/emac/tah.c +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -58,8 +58,7 @@ void tah_reset(struct platform_device *ofdev) --n; if (unlikely(!n)) - printk(KERN_ERR "%s: reset timeout\n", - ofdev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: reset timeout\n", ofdev->dev.of_node); /* 10KB TAH TX FIFO accommodates the max MTU of 9000 */ out_be32(&p->mr, @@ -105,8 +104,7 @@ static int tah_probe(struct platform_device *ofdev) rc = -ENXIO; if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't get registers address\n", np); goto err_free; } @@ -114,8 +112,7 @@ static int tah_probe(struct platform_device *ofdev) dev->base = (struct tah_regs __iomem *)ioremap(regs.start, sizeof(struct tah_regs)); if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); goto err_free; } @@ -124,8 +121,7 @@ static int tah_probe(struct platform_device *ofdev) /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ tah_reset(ofdev); - printk(KERN_INFO - "TAH %s initialized\n", ofdev->dev.of_node->full_name); + printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node); wmb(); return 0; diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c index 8727b865ea02..89c42d362292 100644 --- a/drivers/net/ethernet/ibm/emac/zmii.c +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -121,15 +121,15 @@ int zmii_attach(struct platform_device *ofdev, int input, int *mode) } else dev->mode = *mode; - printk(KERN_NOTICE "%s: bridge in %s mode\n", - ofdev->dev.of_node->full_name, + printk(KERN_NOTICE "%pOF: bridge in %s mode\n", + ofdev->dev.of_node, zmii_mode_name(dev->mode)); } else { /* All inputs must use the same mode */ if (*mode != PHY_MODE_NA && *mode != dev->mode) { printk(KERN_ERR - "%s: invalid mode %d specified for input %d\n", - ofdev->dev.of_node->full_name, *mode, input); + "%pOF: invalid mode %d specified for input %d\n", + ofdev->dev.of_node, *mode, input); mutex_unlock(&dev->lock); return -EINVAL; } @@ -250,8 +250,7 @@ static int zmii_probe(struct platform_device *ofdev) rc = -ENXIO; if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't get registers address\n", np); goto err_free; } @@ -259,8 +258,7 @@ static int zmii_probe(struct platform_device *ofdev) dev->base = (struct zmii_regs __iomem *)ioremap(regs.start, sizeof(struct zmii_regs)); if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); goto err_free; } @@ -270,8 +268,7 @@ static int zmii_probe(struct platform_device *ofdev) /* Disable all inputs by default */ out_be32(&dev->base->fer, 0); - printk(KERN_INFO - "ZMII %s initialized\n", ofdev->dev.of_node->full_name); + printk(KERN_INFO "ZMII %pOF initialized\n", ofdev->dev.of_node); wmb(); platform_set_drvdata(ofdev, dev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a3e694679635..9d8af464dc44 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3851,10 +3851,7 @@ static int ibmvnic_resume(struct device *dev) if (adapter->state != VNIC_OPEN) return 0; - /* kick the interrupt handlers just in case we lost an interrupt */ - for (i = 0; i < adapter->req_rx_queues; i++) - ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq, - adapter->rx_scrq[i]); + tasklet_schedule(&adapter->tasklet); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9692a5294fa3..1d29152256fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1091,7 +1091,7 @@ static void i40e_get_regs(struct net_device *netdev, struct ethtool_regs *regs, struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; u32 *reg_buf = p; - int i, j, ri; + unsigned int i, j, ri; u32 reg; /* Tell ethtool which driver-version-specific regs output we have. @@ -1550,9 +1550,9 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, struct i40e_ring *tx_ring, *rx_ring; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + unsigned int j; int i = 0; char *p; - int j; struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi); unsigned int start; @@ -1637,7 +1637,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; char *p = (char *)data; - int i; + unsigned int i; switch (stringset) { case ETH_SS_TEST: diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2db93d3f6d23..4104944ea367 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4773,7 +4773,7 @@ static void i40e_detect_recover_hung(struct i40e_pf *pf) { struct net_device *netdev; struct i40e_vsi *vsi; - int i; + unsigned int i; /* Only for LAN VSI */ vsi = pf->vsi[pf->lan_vsi]; @@ -7520,6 +7520,18 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } +static const char *i40e_tunnel_name(struct i40e_udp_port_config *port) +{ + switch (port->type) { + case UDP_TUNNEL_TYPE_VXLAN: + return "vxlan"; + case UDP_TUNNEL_TYPE_GENEVE: + return "geneve"; + default: + return "unknown"; + } +} + /** * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters * @pf: board private structure @@ -7565,14 +7577,14 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) ret = i40e_aq_del_udp_tunnel(hw, i, NULL); if (ret) { - dev_dbg(&pf->pdev->dev, - "%s %s port %d, index %d failed, err %s aq_err %s\n", - pf->udp_ports[i].type ? "vxlan" : "geneve", - port ? "add" : "delete", - port, i, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, - pf->hw.aq.asq_last_status)); + dev_info(&pf->pdev->dev, + "%s %s port %d, index %d failed, err %s aq_err %s\n", + i40e_tunnel_name(&pf->udp_ports[i]), + port ? "add" : "delete", + port, i, + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); pf->udp_ports[i].port = 0; } } @@ -9589,6 +9601,7 @@ static int i40e_xdp(struct net_device *dev, return i40e_xdp_setup(vsi, xdp->prog); case XDP_QUERY_PROG: xdp->prog_attached = i40e_enabled_xdp_vsi(vsi); + xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; default: return -EINVAL; @@ -12089,7 +12102,10 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); i40e_stop_misc_vector(pf); - + if (pf->msix_entries) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + } retval = pci_save_state(pdev); if (retval) return retval; @@ -12129,6 +12145,15 @@ static int i40e_resume(struct pci_dev *pdev) /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { clear_bit(__I40E_DOWN, pf->state); + if (pf->msix_entries) { + err = request_irq(pf->msix_entries[0].vector, + i40e_intr, 0, pf->int_name, pf); + if (err) { + dev_err(&pf->pdev->dev, + "request_irq for %s failed: %d\n", + pf->int_name, err); + } + } i40e_reset_and_rebuild(pf, false, false); } @@ -12168,12 +12193,14 @@ static int __init i40e_init_module(void) i40e_driver_string, i40e_driver_version_str); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); - /* we will see if single thread per module is enough for now, - * it can't be any worse than using the system workqueue which - * was already single threaded + /* There is no need to throttle the number of active tasks because + * each device limits its own task using a state bit for scheduling + * the service task, and the device tasks do not interfere with each + * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM + * since we need to be able to guarantee forward progress even under + * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, - i40e_driver_name); + i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 800bd55d0159..6fdecd70dcbc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -134,8 +134,25 @@ i40e_i40e_acquire_nvm_exit: **/ void i40e_release_nvm(struct i40e_hw *hw) { - if (!hw->nvm.blank_nvm_mode) - i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL); + i40e_status ret_code = I40E_SUCCESS; + u32 total_delay = 0; + + if (hw->nvm.blank_nvm_mode) + return; + + ret_code = i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL); + + /* there are some rare cases when trying to release the resource + * results in an admin Q timeout, so handle them correctly + */ + while ((ret_code == I40E_ERR_ADMIN_QUEUE_TIMEOUT) && + (total_delay < hw->aq.asq_cmd_timeout)) { + usleep_range(1000, 2000); + ret_code = i40e_aq_release_resource(hw, + I40E_NVM_RESOURCE_ID, + 0, NULL); + total_delay++; + } } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 1a0be835fa06..0129ed3b78ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -158,13 +158,12 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now, then; + struct timespec64 now; - then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, &now); - now = timespec64_add(now, then); + timespec64_add_ns(&now, delta); i40e_ptp_write(pf, (const struct timespec64 *)&now); mutex_unlock(&pf->tmreg_lock); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b936febc315a..d464fceb300f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -860,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); -#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this @@ -2063,7 +2063,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false, xdp_xmit = false; - while (likely(total_rx_packets < budget)) { + while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; struct xdp_buff xdp; @@ -2196,7 +2196,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_bytes += total_rx_bytes; /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : total_rx_packets; + return failure ? budget : (int)total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -2451,9 +2451,15 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, hlen = (hdr.network[0] & 0x0F) << 2; l4_proto = hdr.ipv4->protocol; } else { - hlen = hdr.network - skb->data; - l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); - hlen -= hdr.network - skb->data; + /* find the start of the innermost ipv6 header */ + unsigned int inner_hlen = hdr.network - skb->data; + unsigned int h_offset = inner_hlen; + + /* this function updates h_offset to the end of the header */ + l4_proto = + ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL); + /* hlen will contain our best estimate of the tcp header */ + hlen = h_offset - inner_hlen; } if (l4_proto != IPPROTO_TCP) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ecbe40ea8ffe..979110d59f67 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1567,7 +1567,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) dev_err(&pf->pdev->dev, "VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n", vf->vf_id); - ret = I40E_ERR_PARAM; + aq_ret = I40E_ERR_PARAM; goto err; } vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING; @@ -1741,16 +1741,14 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, NULL); } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { - aq_ret = 0; - if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) { - aq_ret = - i40e_aq_set_vsi_uc_promisc_on_vlan(hw, - vsi->seid, - alluni, - f->vlan, - NULL); - aq_err = pf->hw.aq.asq_last_status; - } + if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) + continue; + aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, + vsi->seid, + alluni, + f->vlan, + NULL); + aq_err = pf->hw.aq.asq_last_status; if (aq_ret) dev_err(&pf->pdev->dev, "Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n", @@ -2764,7 +2762,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) spin_unlock_bh(&vsi->mac_filter_hash_lock); - dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ if (i40e_sync_vsi_filters(vsi)) { dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); @@ -2772,7 +2769,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; } ether_addr_copy(vf->default_lan_addr.addr, mac); - vf->pf_set_mac = true; + + if (is_zero_ether_addr(mac)) { + vf->pf_set_mac = false; + dev_info(&pf->pdev->dev, "Removing MAC on VF %d\n", vf_id); + } else { + vf->pf_set_mac = true; + dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", + mac, vf_id); + } + /* Force the VF driver stop so it has to reload with new MAC address */ i40e_vc_disable_vf(pf, vf); dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h index 5e314fd3c016..a90737786c34 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h @@ -54,7 +54,7 @@ struct i40e_dma_mem { void *va; dma_addr_t pa; u32 size; -} __packed; +}; #define i40e_allocate_dma_mem(h, m, unused, s, a) \ i40evf_allocate_dma_mem_d(h, m, s, a) @@ -63,7 +63,7 @@ struct i40e_dma_mem { struct i40e_virt_mem { void *va; u32 size; -} __packed; +}; #define i40e_allocate_virt_mem(h, m, s) i40evf_allocate_virt_mem_d(h, m, s) #define i40e_free_virt_mem(h, m) i40evf_free_virt_mem_d(h, m) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 12b02e530503..d91676ccf125 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -275,7 +275,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); -#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this @@ -1299,7 +1299,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; - while (likely(total_rx_packets < budget)) { + while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; unsigned int size; @@ -1406,7 +1406,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_bytes += total_rx_bytes; /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : total_rx_packets; + return failure ? budget : (int)total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 6cc92089fecb..7901cc85cbe5 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -39,6 +39,17 @@ #include <linux/tcp.h> #include <linux/sctp.h> #include <linux/ipv6.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <linux/delay.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/socket.h> +#include <linux/jiffies.h> #include <net/ip6_checksum.h> #include <net/udp.h> diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 9bb2cc7dd4e4..76fd89c1dbb2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -165,7 +165,7 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct i40evf_adapter *adapter = netdev_priv(netdev); - int i, j; + unsigned int i, j; char *p; for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) { @@ -197,7 +197,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data) int i; if (sset == ETH_SS_STATS) { - for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) { + for (i = 0; i < (int)I40EVF_GLOBAL_STATS_LEN; i++) { memcpy(p, i40evf_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 7c213a347909..93536b9fc629 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1957,8 +1957,8 @@ static void i40evf_adminq_task(struct work_struct *work) container_of(work, struct i40evf_adapter, adminq_task); struct i40e_hw *hw = &adapter->hw; struct i40e_arq_event_info event; - struct virtchnl_msg *v_msg; - i40e_status ret; + enum virtchnl_ops v_op; + i40e_status ret, v_ret; u32 val, oldval; u16 pending; @@ -1970,15 +1970,15 @@ static void i40evf_adminq_task(struct work_struct *work) if (!event.msg_buf) goto out; - v_msg = (struct virtchnl_msg *)&event.desc; do { ret = i40evf_clean_arq_element(hw, &event, &pending); - if (ret || !v_msg->v_opcode) + v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); + v_ret = (i40e_status)le32_to_cpu(event.desc.cookie_low); + + if (ret || !v_op) break; /* No event to process or error cleaning ARQ */ - i40evf_virtchnl_completion(adapter, v_msg->v_opcode, - (i40e_status)v_msg->v_retval, - event.msg_buf, + i40evf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf, event.msg_len); if (pending != 0) memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 4e35e7017f3d..2c19070d2a0b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -79,16 +79,28 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) switch (hw->phy.media_type) { case ixgbe_media_type_fiber: - hw->mac.ops.check_link(hw, &speed, &link_up, false); - /* if link is down, assume supported */ - if (link_up) - supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? + /* flow control autoneg black list */ + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_SFP: + case IXGBE_DEV_ID_X550EM_A_SFP_N: + supported = false; + break; + default: + hw->mac.ops.check_link(hw, &speed, &link_up, false); + /* if link is down, assume supported */ + if (link_up) + supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? true : false; - else - supported = true; + else + supported = true; + } + break; case ixgbe_media_type_backplane: - supported = true; + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_XFI) + supported = false; + else + supported = true; break; case ixgbe_media_type_copper: /* only some copper devices support flow control autoneg */ @@ -111,6 +123,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) break; } + if (!supported) + hw_dbg(hw, "Device %x does not support flow control autoneg\n", + hw->device_id); + return supported; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index b45fdc98033d..f1bfae0c41d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1018,8 +1018,12 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx]; struct ixgbe_ring *ring; - ixgbe_for_each_ring(ring, q_vector->tx) - adapter->tx_ring[ring->queue_index] = NULL; + ixgbe_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + adapter->xdp_ring[ring->queue_index] = NULL; + else + adapter->tx_ring[ring->queue_index] = NULL; + } ixgbe_for_each_ring(ring, q_vector->rx) adapter->rx_ring[ring->queue_index] = NULL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f1dbdf26d8e1..96606e3eb965 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -386,7 +386,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) if (ixgbe_removed(reg_addr)) return IXGBE_FAILED_READ_REG; if (unlikely(hw->phy.nw_mng_if_sel & - IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M)) { + IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) { struct ixgbe_adapter *adapter; int i; @@ -2214,7 +2214,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, struct xdp_buff *xdp) { - int result = IXGBE_XDP_PASS; + int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; u32 act; @@ -2231,6 +2231,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_TX: result = ixgbe_xmit_xdp_ring(adapter, xdp); break; + case XDP_REDIRECT: + err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + if (!err) + result = IXGBE_XDP_TX; + else + result = IXGBE_XDP_CONSUMED; + break; default: bpf_warn_invalid_xdp_action(act); /* fallthrough */ @@ -2408,6 +2415,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ wmb(); writel(ring->next_to_use, ring->tail); + + xdp_do_flush_map(); } u64_stats_update_begin(&rx_ring->syncp); @@ -5810,6 +5819,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter) usleep_range(10000, 20000); + /* synchronize_sched() needed for pending XDP buffers to drain */ + if (adapter->xdp_ring[0]) + synchronize_sched(); netif_tx_stop_all_queues(netdev); /* call carrier off first to avoid false dev_watchdog timeouts */ @@ -9823,6 +9835,53 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp) } } +static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ring *ring; + int err; + + if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) + return -EINVAL; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL; + if (unlikely(!ring)) + return -EINVAL; + + err = ixgbe_xmit_xdp_ring(adapter, xdp); + if (err != IXGBE_XDP_TX) + return -ENOMEM; + + return 0; +} + +static void ixgbe_xdp_flush(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ring *ring; + + /* Its possible the device went down between xdp xmit and flush so + * we need to ensure device is still up. + */ + if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) + return; + + ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL; + if (unlikely(!ring)) + return; + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + */ + wmb(); + writel(ring->next_to_use, ring->tail); + + return; +} + static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, @@ -9869,6 +9928,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, .ndo_xdp = ixgbe_xdp, + .ndo_xdp_xmit = ixgbe_xdp_xmit, + .ndo_xdp_flush = ixgbe_xdp_flush, }; /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 0760bd7eeb01..112d24c6c9ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -679,8 +679,9 @@ update_vlvfb: static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, int vf, int index, unsigned char *mac_addr) { - struct list_head *pos; struct vf_macvlans *entry; + struct list_head *pos; + int retval = 0; if (index <= 1) { list_for_each(pos, &adapter->vf_mvs.l) { @@ -721,13 +722,15 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, if (!entry || !entry->free) return -ENOSPC; + retval = ixgbe_add_mac_filter(adapter, mac_addr, vf); + if (retval < 0) + return retval; + entry->free = false; entry->is_macvlan = true; entry->vf = vf; memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN); - ixgbe_add_mac_filter(adapter, mac_addr, vf); - return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 9c2460c5ef1b..ffa0ee5cd0f5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3778,8 +3778,8 @@ struct ixgbe_info { #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G BIT(19) #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G BIT(20) #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G BIT(21) -#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M BIT(23) -#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE BIT(24) +#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE BIT(25) +#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE BIT(24) /* X552 only */ #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT 3 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD \ (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 72d84a065e34..19fbb2f28ea4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1555,9 +1555,14 @@ static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw) **/ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) { + struct ixgbe_mac_info *mac = &hw->mac; s32 status; u32 reg_val; + /* iXFI is only supported with X552 */ + if (mac->type != ixgbe_mac_X550EM_x) + return IXGBE_ERR_LINK_SETUP; + /* Disable AN and force speed to 10G Serial. */ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), @@ -1874,8 +1879,10 @@ static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw, else force_speed = IXGBE_LINK_SPEED_1GB_FULL; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* If X552 and internal link mode is XFI, then setup XFI internal link. + */ + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { status = ixgbe_setup_ixfi_x550em(hw, &force_speed); if (status) @@ -2404,17 +2411,30 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); /* Enable link status change alarm */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - MDIO_MMD_AN, ®); - if (status) - return status; - reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; + /* Enable the LASI interrupts on X552 devices to receive notifications + * of the link configurations of the external PHY and correspondingly + * support the configuration of the internal iXFI link, since iXFI does + * not support auto-negotiation. This is not required for X553 devices + * having KR support, which performs auto-negotiations and which is used + * as the internal link to the external PHY. Hence adding a check here + * to avoid enabling LASI interrupts for X553 devices. + */ + if (hw->mac.type != ixgbe_mac_x550em_a) { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + MDIO_MMD_AN, ®); + if (status) + return status; - status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - MDIO_MMD_AN, reg); - if (status) - return status; + reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; + + status = hw->phy.ops.write_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + MDIO_MMD_AN, reg); + if (status) + return status; + } /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, @@ -2615,7 +2635,8 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; - if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { + if (!(hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) { speed = IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL; return ixgbe_setup_kr_speed_x550em(hw, speed); @@ -2822,7 +2843,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) { bool pause, asm_dir; u32 reg_val; - s32 rc; + s32 rc = 0; /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { @@ -2865,32 +2886,37 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) return IXGBE_ERR_CONFIG; } - if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR && - hw->device_id != IXGBE_DEV_ID_X550EM_A_KR && - hw->device_id != IXGBE_DEV_ID_X550EM_A_KR_L) - return 0; - - rc = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - ®_val); - if (rc) - return rc; - - reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | - IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); - if (pause) - reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; - if (asm_dir) - reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; - rc = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - reg_val); - - /* This device does not fully support AN. */ - hw->fc.disable_fc_autoneg = true; + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_X_KR: + case IXGBE_DEV_ID_X550EM_A_KR: + case IXGBE_DEV_ID_X550EM_A_KR_L: + rc = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, + ®_val); + if (rc) + return rc; + reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); + if (pause) + reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; + if (asm_dir) + reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + rc = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, + reg_val); + + /* This device does not fully support AN. */ + hw->fc.disable_fc_autoneg = true; + break; + case IXGBE_DEV_ID_X550EM_X_XFI: + hw->fc.disable_fc_autoneg = true; + break; + default: + break; + } return rc; } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e588a0cdb074..f1886e1bdd82 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1032,7 +1032,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) unsigned int done[MTK_MAX_DEVS]; unsigned int bytes[MTK_MAX_DEVS]; u32 cpu, dma; - static int condition; int total = 0, i; memset(done, 0, sizeof(done)); @@ -1056,10 +1055,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) mac = 1; skb = tx_buf->skb; - if (!skb) { - condition = 1; + if (!skb) break; - } if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) { bytes[mac] += skb->len; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 85fe17e4dcfb..87d1f4d2a77b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -208,12 +208,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) cq->moder_cnt, cq->moder_time); } -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, &priv->mdev->uar_lock); - - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 2b0cbca4beb5..686e18de9a97 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev, mutex_unlock(&priv->mdev->state_lock); } -static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) +static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; @@ -176,8 +176,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; } - - return 0; } static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) @@ -309,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) } /* Build device profile according to supplied module parameters */ - if (mlx4_en_get_profile(mdev)) { - mlx4_err(mdev, "Bad module parameters, aborting\n"); - goto err_mr; - } + mlx4_en_get_profile(mdev); /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d350b2158104..fdb3ad0cbe54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -685,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 9807ef814e42..f6963b0b4a55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -57,6 +57,9 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1bd34d9a7b9e..7e8ba546c3a4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3679,15 +3679,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, }; /* reg_htgt_trap_group @@ -3952,10 +3954,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); */ MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); -static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) { MLXSW_REG_ZERO(rgcr, payload); mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); } /* RITR - Router Interface Table Register @@ -3988,16 +3992,16 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); enum mlxsw_reg_ritr_if_type { + /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, + /* FID interface. */ MLXSW_REG_RITR_FID_IF, + /* Sub-port interface. */ MLXSW_REG_RITR_SP_IF, }; /* reg_ritr_type - * Router interface type. - * 0 - VLAN interface. - * 1 - FID interface. - * 2 - Sub-port interface. + * Router interface type as per enum mlxsw_reg_ritr_if_type. * Access: RW */ MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); @@ -4203,10 +4207,12 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, MLXSW_REG_ZERO(ritr, payload); mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); @@ -4712,12 +4718,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); /* reg_ralue_dip* * The prefix of the route or of the marker that the object of the LPM * is compared with. The most significant bits of the dip are the prefix. - * The list significant bits must be '0' if the prefix_len is smaller + * The least significant bits must be '0' if the prefix_len is smaller * than 128 for IPv6 or smaller than 32 for IPv4. * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. * Access: Index */ MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); enum mlxsw_reg_ralue_entry_type { MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, @@ -4806,7 +4813,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); */ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); -/* reg_ralue_v +/* reg_ralue_ip2me_v * Valid bit for the tunnel_ptr field. * If valid = 0 then trap to CPU as IP2ME trap ID. * If valid = 1 and the packet format allows NVE or IPinIP tunnel @@ -4816,15 +4823,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); +MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1); -/* reg_ralue_tunnel_ptr +/* reg_ralue_ip2me_tunnel_ptr * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. * For Spectrum, pointer to KVD Linear. * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); +MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24); static inline void mlxsw_reg_ralue_pack(char *payload, enum mlxsw_reg_ralxx_protocol protocol, @@ -4851,6 +4858,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, mlxsw_reg_ralue_dip4_set(payload, dip); } +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + static inline void mlxsw_reg_ralue_act_remote_pack(char *payload, enum mlxsw_reg_ralue_trap_action trap_action, @@ -4954,6 +4971,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); * Access: Index */ MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); enum mlxsw_reg_rauht_trap_action { MLXSW_REG_RAUHT_TRAP_ACTION_NOP, @@ -5018,6 +5036,15 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, mlxsw_reg_rauht_dip4_set(payload, dip); } +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + /* RALEU - Router Algorithmic LPM ECMP Update Register * --------------------------------------------------- * The register enables updating the ECMP section in the action for multiple @@ -5216,6 +5243,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, int ent_index, u16 *p_rif, u32 *p_dip) @@ -5224,6 +5275,14 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); } +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 60bf8f27cc00..88b668ba0d8a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -58,6 +58,7 @@ #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include <net/tc_act/tc_sample.h> +#include <net/addrconf.h> #include "spectrum.h" #include "pci.h" @@ -3333,15 +3334,47 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), /* L3 traps */ - MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false), - MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), - MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), - MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), - MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), + MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -3376,15 +3409,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: rate = 16 * 1024; burst_size = 10; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: rate = 1024; burst_size = 7; break; @@ -3433,21 +3468,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) priority = 5; tc = 5; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: priority = 4; tc = 4; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: priority = 3; tc = 3; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: priority = 1; @@ -4357,6 +4394,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, +}; + static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; @@ -4377,6 +4418,7 @@ static int __init mlxsw_sp_module_init(void) register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); @@ -4393,6 +4435,7 @@ err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4403,6 +4446,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5ef98d4d0ab6..e848f06e34e6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -384,6 +384,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); void diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 01a1501b56ca..508b5fcacd77 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -369,7 +369,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port = mlxsw_sp_port->local_port; in_port = false; } else { - /* If out_dev is NULL, the called wants to + /* If out_dev is NULL, the caller wants to * set forward to ingress port. */ local_port = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 85d5001a5818..fb8031828454 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 61a10f166f97..bc5173f1b5c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -984,6 +984,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 6afbe9ec64e2..bbd238e50f05 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, - [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, @@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int *mlxsw_sp_packet_type_sfgc_types[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 21bb2bf62d3e..400ad4081660 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -212,11 +212,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u16 n_proto) +{ + struct flow_dissector_key_ip *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + key->ttl, mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + key->tos & 0x3, mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + key->tos >> 6, mask->tos >> 6); + + return 0; +} + static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + u16 n_proto_mask = 0; + u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; @@ -229,6 +264,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; @@ -253,8 +289,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_BASIC, f->mask); - u16 n_proto_key = ntohs(key->n_proto); - u16 n_proto_mask = ntohs(mask->n_proto); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -324,6 +360,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4b2e0fd7d51e..add03fa34a2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -43,12 +43,16 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/socket.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> #include <net/fib_rules.h> #include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> #include "spectrum.h" #include "core.h" @@ -304,7 +308,7 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) struct mlxsw_sp_prefix_usage { DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); @@ -384,23 +388,23 @@ struct mlxsw_sp_fib_node { struct mlxsw_sp_fib_key key; }; -struct mlxsw_sp_fib_entry_params { - u32 tb_id; - u32 prio; - u8 tos; - u8 type; -}; - struct mlxsw_sp_fib_entry { struct list_head list; struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; - struct mlxsw_sp_fib_entry_params params; bool offloaded; }; +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + enum mlxsw_sp_l3proto { MLXSW_SP_L3_PROTO_IPV4, MLXSW_SP_L3_PROTO_IPV6, @@ -428,6 +432,7 @@ struct mlxsw_sp_vr { u32 tb_id; /* kernel fib table id */ unsigned int rif_count; struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -625,7 +630,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4; + return !!vr->fib4 || !!vr->fib6; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -694,7 +699,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, case MLXSW_SP_L3_PROTO_IPV4: return vr->fib4; case MLXSW_SP_L3_PROTO_IPV6: - BUG_ON(1); + return vr->fib6; } return NULL; } @@ -703,6 +708,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; + int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) @@ -710,12 +716,24 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); + vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(vr->fib6)) { + err = PTR_ERR(vr->fib6); + goto err_fib6_create; + } vr->tb_id = tb_id; return vr; + +err_fib6_create: + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; + return ERR_PTR(err); } static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; } @@ -773,7 +791,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list)) mlxsw_sp_vr_destroy(vr); } @@ -929,8 +948,15 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) static void mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long interval; +#if IS_ENABLED(CONFIG_IPV6) + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); +#else + interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); +#endif mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); } @@ -965,6 +991,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +#if IS_ENABLED(IPV6) +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} +#else +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ +} +#endif + static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) @@ -988,6 +1052,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, } +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) { @@ -997,7 +1070,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, rec_index); break; case MLXSW_REG_RAUHTD_TYPE_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); break; } } @@ -1022,22 +1096,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) return false; } -static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) { - char *rauhtd_pl; - u8 num_rec; - int i, err; - - rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); - if (!rauhtd_pl) - return -ENOMEM; + int i, num_rec; + int err; /* Make sure the neighbour's netdev isn't removed in the * process. */ rtnl_lock(); do { - mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { @@ -1051,6 +1123,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: kfree(rauhtd_pl); return err; } @@ -1147,6 +1240,32 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n) +{ + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + +static void mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding) @@ -1154,11 +1273,17 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; - if (neigh_entry->key.n->tbl == &arp_tbl) + if (neigh_entry->key.n->tbl->family == AF_INET) { mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); - else + } else if (neigh_entry->key.n->tbl->family == AF_INET6) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n)) + return; + mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + } else { WARN_ON_ONCE(1); + } } struct mlxsw_sp_neigh_event_work { @@ -1227,7 +1352,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, p = ptr; /* We don't care about changes in the default table. */ - if (!p->dev || p->tbl != &arp_tbl) + if (!p->dev || (p->tbl->family != AF_INET && + p->tbl->family != AF_INET6)) return NOTIFY_DONE; /* We are in atomic context and can't take RTNL mutex, @@ -1246,7 +1372,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, case NETEVENT_NEIGH_UPDATE: n = ptr; - if (n->tbl != &arp_tbl) + if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); @@ -1307,25 +1433,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); -} - static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } struct mlxsw_sp_nexthop_key { @@ -1340,6 +1457,7 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -1360,6 +1478,7 @@ struct mlxsw_sp_nexthop_group_key { struct mlxsw_sp_nexthop_group { struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ + struct neigh_table *neigh_tbl; struct mlxsw_sp_nexthop_group_key key; u8 adj_index_valid:1, gateway:1; /* routes using the group use a gateway */ @@ -1556,7 +1675,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - if (nh->should_offload ^ nh->offloaded) { + if (nh->should_offload != nh->offloaded) { offload_change = true; if (nh->should_offload) nh->update = 1; @@ -1640,9 +1759,9 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else if (nh->offloaded) nh->should_offload = 0; nh->update = 1; } @@ -1684,7 +1803,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; int err; @@ -1693,13 +1811,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, return 0; /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. + * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -1761,10 +1880,10 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) { struct net_device *dev = fib_nh->nh_dev; struct in_device *in_dev; @@ -1773,6 +1892,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; + memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -1802,16 +1922,16 @@ err_nexthop_neigh_init: return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } -static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, - unsigned long event, struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; @@ -1856,7 +1976,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; @@ -1871,6 +1991,8 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->neigh_tbl = &arp_tbl; + nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; nh_grp->count = fi->fib_nhs; nh_grp->key.fi = fi; @@ -1878,9 +2000,9 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; - err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) - goto err_nexthop_init; + goto err_nexthop4_init; } err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) @@ -1889,10 +2011,10 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) return nh_grp; err_nexthop_group_insert: -err_nexthop_init: +err_nexthop4_init: for (i--; i >= 0; i--) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } fib_info_put(nh_grp->key.fi); kfree(nh_grp); @@ -1900,8 +2022,8 @@ err_nexthop_init: } static void -mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop *nh; int i; @@ -1909,7 +2031,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); WARN_ON_ONCE(nh_grp->adj_index_valid); @@ -1917,9 +2039,9 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, kfree(nh_grp); } -static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - struct fib_info *fi) +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) { struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; @@ -1927,7 +2049,7 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, key.fi = fi; nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); if (!nh_grp) { - nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } @@ -1936,15 +2058,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; - mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->tos; } static bool @@ -1952,8 +2084,14 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - if (fib_entry->params.tos) - return false; + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: @@ -2016,13 +2154,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, } } -static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -2041,24 +2203,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; u16 trap_id = 0; u16 rif_index = 0; @@ -2070,42 +2227,34 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); } return -EINVAL; } @@ -2114,16 +2263,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - int err = -EINVAL; + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - switch (fib_entry->fib_node->fib->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); - break; - case MLXSW_SP_L3_PROTO_IPV6: - return err; - } mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } @@ -2173,72 +2316,80 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, } } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, const struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_entry *fib_entry; int err; - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) { - err = -ENOMEM; - goto err_fib_entry_alloc; - } + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) - goto err_nexthop_group_get; + goto err_nexthop4_group_get; - fib_entry->params.prio = fen_info->fi->fib_priority; - fib_entry->params.tb_id = fen_info->tb_id; - fib_entry->params.type = fen_info->type; - fib_entry->params.tos = fen_info->tos; + fib4_entry->prio = fen_info->fi->fib_priority; + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->tos = fen_info->tos; fib_entry->fib_node = fib_node; - return fib_entry; + return fib4_entry; -err_nexthop_group_get: +err_nexthop4_group_get: err_fib4_entry_type_set: - kfree(fib_entry); -err_fib_entry_alloc: + kfree(fib4_entry); return ERR_PTR(err); } static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); - kfree(fib_entry); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info); +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_node)) + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id == fen_info->tb_id && - fib_entry->params.tos == fen_info->tos && - fib_entry->params.type == fen_info->type && - fib_entry->nh_group->key.fi == fen_info->fi) { - return fib_entry; + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + fib4_entry->common.nh_group->key.fi == fen_info->fi) { + return fib4_entry; } } @@ -2395,28 +2546,25 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); - fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + fib = mlxsw_sp_vr_fib(vr, proto); - fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); if (!fib_node) { err = -ENOMEM; goto err_fib_node_create; @@ -2435,8 +2583,8 @@ err_fib_node_create: return ERR_PTR(err); } -static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { struct mlxsw_sp_vr *vr = fib_node->fib->vr; @@ -2447,95 +2595,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(vr); } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry_params *params) + const struct mlxsw_sp_fib4_entry *new4_entry) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id > params->tb_id) + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id > new4_entry->tb_id) continue; - if (fib_entry->params.tb_id != params->tb_id) + if (fib4_entry->tb_id != new4_entry->tb_id) break; - if (fib_entry->params.tos > params->tos) + if (fib4_entry->tos > new4_entry->tos) continue; - if (fib_entry->params.prio >= params->prio || - fib_entry->params.tos < params->tos) - return fib_entry; + if (fib4_entry->prio >= new4_entry->prio || + fib4_entry->tos < new4_entry->tos) + return fib4_entry; } return NULL; } -static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, - struct mlxsw_sp_fib_entry *new_entry) +static int +mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, + struct mlxsw_sp_fib4_entry *new4_entry) { struct mlxsw_sp_fib_node *fib_node; - if (WARN_ON(!fib_entry)) + if (WARN_ON(!fib4_entry)) return -EINVAL; - fib_node = fib_entry->fib_node; - list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id != new_entry->params.tb_id || - fib_entry->params.tos != new_entry->params.tos || - fib_entry->params.prio != new_entry->params.prio) + fib_node = fib4_entry->common.fib_node; + list_for_each_entry_from(fib4_entry, &fib_node->entry_list, + common.list) { + if (fib4_entry->tb_id != new4_entry->tb_id || + fib4_entry->tos != new4_entry->tos || + fib4_entry->prio != new4_entry->prio) break; } - list_add_tail(&new_entry->list, &fib_entry->list); + list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); return 0; } static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *new_entry, +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_entry; - fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); if (append) - return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); - if (replace && WARN_ON(!fib_entry)) + return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); + if (replace && WARN_ON(!fib4_entry)) return -EINVAL; /* Insert new entry before replaced one, so that we can later * remove the second. */ - if (fib_entry) { - list_add_tail(&new_entry->list, &fib_entry->list); + if (fib4_entry) { + list_add_tail(&new4_entry->common.list, + &fib4_entry->common.list); } else { - struct mlxsw_sp_fib_entry *last; + struct mlxsw_sp_fib4_entry *last; - list_for_each_entry(last, &fib_node->entry_list, list) { - if (new_entry->params.tb_id > last->params.tb_id) + list_for_each_entry(last, &fib_node->entry_list, common.list) { + if (new4_entry->tb_id > last->tb_id) break; - fib_entry = last; + fib4_entry = last; } - if (fib_entry) - list_add(&new_entry->list, &fib_entry->list); + if (fib4_entry) + list_add(&new4_entry->common.list, + &fib4_entry->common.list); else - list_add(&new_entry->list, &fib_node->entry_list); + list_add(&new4_entry->common.list, + &fib_node->entry_list); } return 0; } static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) { - list_del(&fib_entry->list); + list_del(&fib4_entry->common.list); } -static int -mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return 0; @@ -2552,11 +2705,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static void -mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return; @@ -2574,54 +2727,50 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, - append); + err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); if (err) return err; - err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) - goto err_fib4_node_entry_add; + goto err_fib_node_entry_add; return 0; -err_fib4_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib_entry); +err_fib_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib4_entry); return err; } static void mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); - mlxsw_sp_fib4_node_list_remove(fib_entry); + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_node_list_remove(fib4_entry); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *replaced; if (!replace) return; /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib_entry, list); + replaced = list_next_entry(fib4_entry, common.list); mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static int @@ -2629,76 +2778,80 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; int err; if (mlxsw_sp->router->aborted) return 0; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(fib_node)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); return PTR_ERR(fib_node); } - fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); - if (IS_ERR(fib_entry)) { + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); - err = PTR_ERR(fib_entry); + err = PTR_ERR(fib4_entry); goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, append); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); return 0; err_fib4_node_entry_link: - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; if (mlxsw_sp->router->aborted) return; - fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib_entry)) + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib4_entry)) return; - fib_node = fib_entry->fib_node; + fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_ralxx_protocol proto, + u8 tree_id) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); if (err) return err; @@ -2711,17 +2864,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, - 0); + mlxsw_reg_ralue_pack(ralue_pl, proto, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -2732,17 +2882,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + int err; + + err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN); + if (err) + return err; + + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN + 1); +} + static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib_entry *fib_entry, *tmp; + struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { - bool do_break = &tmp->list == &fib_node->entry_list; + list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); /* Break when entry list is empty and node was freed. * Otherwise, we'll access freed memory in the next * iteration. @@ -2791,10 +2957,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2839,7 +3012,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, replace, append); if (err) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: @@ -2850,13 +3023,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -2948,17 +3121,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, - const struct in_device *in_dev, - unsigned long event) +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) { + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + switch (event) { case NETDEV_UP: - if (!rif) - return true; - return false; + return rif == NULL; case NETDEV_DOWN: - if (rif && !in_dev->ifa_list && + idev = __in_dev_get_rtnl(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + + if (rif && addr_list_empty && !netif_is_l3_slave(rif->dev)) return true; /* It is possible we already removed the RIF ourselves @@ -3356,7 +3540,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(dev, event); @@ -3364,6 +3548,61 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(dev, event); +out: + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -3565,6 +3804,11 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) @@ -3573,6 +3817,9 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) return 0; err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); return err; } @@ -3584,6 +3831,8 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); } @@ -3614,6 +3863,11 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) @@ -3622,6 +3876,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) return 0; err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } @@ -3633,6 +3890,8 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } @@ -3704,7 +3963,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) @@ -3716,7 +3975,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; - mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12b5ed58f3eb..61652396bf75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -61,11 +61,32 @@ enum { MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, - MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, - MLXSW_TRAP_ID_BGP_IPV4 = 0x88, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, MLXSW_TRAP_ID_MAX = 0x1FF diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index c0d7d5eec7e7..2e4effa9fe45 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -161,7 +161,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev) priv->rx_head = 0; - /* reset the MAC controller TX/RX desciptor base address */ + /* reset the MAC controller TX/RX descriptor base address */ writel(priv->tx_base, priv->base + REG_TXR_BASE_ADDRESS); writel(priv->rx_base, priv->base + REG_RXR_BASE_ADDRESS); } @@ -269,9 +269,8 @@ rx_next: priv->rx_head = rx_head; } - if (rx < budget) { + if (rx < budget) napi_complete_done(napi, rx); - } priv->reg_imr |= RPKT_FINISH_M; writel(priv->reg_imr, priv->base + REG_INTERRUPT_MASK); @@ -289,8 +288,8 @@ static int moxart_tx_queue_space(struct net_device *ndev) static void moxart_tx_finished(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); - unsigned tx_head = priv->tx_head; - unsigned tx_tail = priv->tx_tail; + unsigned int tx_head = priv->tx_head; + unsigned int tx_tail = priv->tx_tail; while (tx_tail != tx_head) { dma_unmap_single(&ndev->dev, priv->tx_mapping[tx_tail], @@ -312,7 +311,7 @@ static void moxart_tx_finished(struct net_device *ndev) static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id) { - struct net_device *ndev = (struct net_device *) dev_id; + struct net_device *ndev = (struct net_device *)dev_id; struct moxart_mac_priv_t *priv = netdev_priv(ndev); unsigned int ists = readl(priv->base + REG_INTERRUPT_STATUS); @@ -495,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); - if (priv->tx_desc_base == NULL) { + if (!priv->tx_desc_base) { ret = -ENOMEM; goto init_fail; } @@ -503,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); - if (priv->rx_desc_base == NULL) { + if (!priv->rx_desc_base) { ret = -ENOMEM; goto init_fail; } diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index 686b8957d5cf..bee608b547d1 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -55,17 +55,17 @@ #define RX_DESC2_ADDRESS_VIRT 4 #define TX_DESC_NUM 64 -#define TX_DESC_NUM_MASK (TX_DESC_NUM-1) +#define TX_DESC_NUM_MASK (TX_DESC_NUM - 1) #define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK)) #define TX_BUF_SIZE 1600 -#define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1) +#define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK + 1) #define TX_WAKE_THRESHOLD 16 #define RX_DESC_NUM 64 -#define RX_DESC_NUM_MASK (RX_DESC_NUM-1) +#define RX_DESC_NUM_MASK (RX_DESC_NUM - 1) #define RX_NEXT(N) (((N) + 1) & (RX_DESC_NUM_MASK)) #define RX_BUF_SIZE 1600 -#define RX_BUF_SIZE_MAX (RX_DESC1_BUF_SIZE_MASK+1) +#define RX_BUF_SIZE_MAX (RX_DESC1_BUF_SIZE_MASK + 1) #define REG_INTERRUPT_STATUS 0 #define REG_INTERRUPT_MASK 4 diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index fd2ec36c6fa1..462eda926b1c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -42,8 +42,6 @@ * aggregated as a single large packet * napi: This parameter used to enable/disable NAPI (polling Rx) * Possible values '1' for enable and '0' for disable. Default is '1' - * ufo: This parameter used to enable/disable UDP Fragmentation Offload(UFO) - * Possible values '1' for enable and '0' for disable. Default is '0' * vlan_tag_strip: This can be used to enable or disable vlan stripping. * Possible values '1' for enable , '0' for disable. * Default is '2' - which means disable in promisc mode @@ -453,7 +451,6 @@ S2IO_PARM_INT(lro_max_pkts, 0xFFFF); S2IO_PARM_INT(indicate_max_pkts, 0); S2IO_PARM_INT(napi, 1); -S2IO_PARM_INT(ufo, 0); S2IO_PARM_INT(vlan_tag_strip, NO_STRIP_IN_PROMISC); static unsigned int tx_fifo_len[MAX_TX_FIFOS] = @@ -4128,32 +4125,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) } frg_len = skb_headlen(skb); - if (offload_type == SKB_GSO_UDP) { - int ufo_size; - - ufo_size = s2io_udp_mss(skb); - ufo_size &= ~7; - txdp->Control_1 |= TXD_UFO_EN; - txdp->Control_1 |= TXD_UFO_MSS(ufo_size); - txdp->Control_1 |= TXD_BUFFER0_SIZE(8); -#ifdef __BIG_ENDIAN - /* both variants do cpu_to_be64(be32_to_cpu(...)) */ - fifo->ufo_in_band_v[put_off] = - (__force u64)skb_shinfo(skb)->ip6_frag_id; -#else - fifo->ufo_in_band_v[put_off] = - (__force u64)skb_shinfo(skb)->ip6_frag_id << 32; -#endif - txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v; - txdp->Buffer_Pointer = pci_map_single(sp->pdev, - fifo->ufo_in_band_v, - sizeof(u64), - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) - goto pci_map_failed; - txdp++; - } - txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) @@ -4161,8 +4132,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Host_Control = (unsigned long)skb; txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); - if (offload_type == SKB_GSO_UDP) - txdp->Control_1 |= TXD_UFO_EN; frg_cnt = skb_shinfo(skb)->nr_frags; /* For fragmented SKB. */ @@ -4177,14 +4146,9 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) skb_frag_size(frag), DMA_TO_DEVICE); txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag)); - if (offload_type == SKB_GSO_UDP) - txdp->Control_1 |= TXD_UFO_EN; } txdp->Control_1 |= TXD_GATHER_CODE_LAST; - if (offload_type == SKB_GSO_UDP) - frg_cnt++; /* as Txd0 was used for inband header */ - tx_fifo = mac_control->tx_FIFO_start[queue]; val64 = fifo->list_info[put_off].list_phy_addr; writeq(val64, &tx_fifo->TxDL_Pointer); @@ -7910,11 +7874,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) NETIF_F_RXCSUM | NETIF_F_LRO; dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - if (sp->device_type & XFRAME_II_DEVICE) { - dev->hw_features |= NETIF_F_UFO; - if (ufo) - dev->features |= NETIF_F_UFO; - } if (sp->high_dma_flag == true) dev->features |= NETIF_F_HIGHDMA; dev->watchdog_timeo = WATCH_DOG_TIMEOUT; @@ -8147,10 +8106,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n", dev->name); - if (ufo) - DBG_PRINT(ERR_DBG, - "%s: UDP Fragmentation Offload(UFO) enabled\n", - dev->name); /* Initialize device name */ snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name, sp->product_name); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index d67969d3e484..dd769eceb33d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -174,6 +174,21 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs) return nfp_pcie_sriov_enable(pdev, num_vfs); } +static const struct firmware * +nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name) +{ + const struct firmware *fw = NULL; + int err; + + err = request_firmware_direct(&fw, name, &pdev->dev); + nfp_info(pf->cpp, " %s: %s\n", + name, err ? "not found" : "found, loading..."); + if (err) + return NULL; + + return fw; +} + /** * nfp_net_fw_find() - Find the correct firmware image for netdev mode * @pdev: PCI Device structure @@ -184,13 +199,32 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs) static const struct firmware * nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) { - const struct firmware *fw = NULL; struct nfp_eth_table_port *port; + const struct firmware *fw; const char *fw_model; char fw_name[256]; - int spc, err = 0; - int i, j; + const u8 *serial; + u16 interface; + int spc, i, j; + nfp_info(pf->cpp, "Looking for firmware file in order of priority:\n"); + + /* First try to find a firmware image specific for this device */ + interface = nfp_cpp_interface(pf->cpp); + nfp_cpp_serial(pf->cpp, &serial); + sprintf(fw_name, "netronome/serial-%pMF-%02hhx-%02hhx.nffw", + serial, interface >> 8, interface & 0xff); + fw = nfp_net_fw_request(pdev, pf, fw_name); + if (fw) + return fw; + + /* Then try the PCI name */ + sprintf(fw_name, "netronome/pci-%s.nffw", pci_name(pdev)); + fw = nfp_net_fw_request(pdev, pf, fw_name); + if (fw) + return fw; + + /* Finally try the card type and media */ if (!pf->eth_tbl) { dev_err(&pdev->dev, "Error: can't identify media config\n"); return NULL; @@ -223,13 +257,7 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) if (spc <= 0) return NULL; - err = request_firmware(&fw, fw_name, &pdev->dev); - if (err) - return NULL; - - dev_info(&pdev->dev, "Loading FW image: %s\n", fw_name); - - return fw; + return nfp_net_fw_request(pdev, pf, fw_name); } /** diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18750ff0ede6..ea471604450e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2658,6 +2658,7 @@ static int nfp_net_netdev_close(struct net_device *netdev) /* Step 2: Tell NFP */ nfp_net_clear_config_and_disable(nn); + nfp_port_configure(netdev, false); /* Step 3: Free resources */ @@ -2775,16 +2776,21 @@ static int nfp_net_netdev_open(struct net_device *netdev) goto err_free_all; /* Step 2: Configure the NFP + * - Ifup the physical interface if it exists * - Enable rings from 0 to tx_rings/rx_rings - 1. * - Write MAC address (in case it changed) * - Set the MTU * - Set the Freelist buffer size * - Enable the FW */ - err = nfp_net_set_config_and_enable(nn); + err = nfp_port_configure(netdev, true); if (err) goto err_free_all; + err = nfp_net_set_config_and_enable(nn); + if (err) + goto err_port_disable; + /* Step 3: Enable for kernel * - put some freelist descriptors on each RX ring * - enable NAPI on each ring @@ -2795,6 +2801,8 @@ static int nfp_net_netdev_open(struct net_device *netdev) return 0; +err_port_disable: + nfp_port_configure(netdev, false); err_free_all: nfp_net_close_free_all(nn); return err; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 5797dbf2b507..d5e2361f0e86 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -704,7 +704,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (!pf->rtbl) { nfp_err(pf->cpp, "No %s, giving up.\n", pf->fw_loaded ? "symbol table" : "firmware found"); - return -EPROBE_DEFER; + return -EINVAL; } mutex_lock(&pf->lock); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 8ec5474f4b18..47daad30756c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -239,15 +239,34 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) static int nfp_repr_stop(struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); + int err; + + err = nfp_app_repr_stop(repr->app, repr); + if (err) + return err; - return nfp_app_repr_stop(repr->app, repr); + nfp_port_configure(netdev, false); + return 0; } static int nfp_repr_open(struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); + int err; + + err = nfp_port_configure(netdev, true); + if (err) + return err; + + err = nfp_app_repr_open(repr->app, repr); + if (err) + goto err_port_disable; - return nfp_app_repr_open(repr->app, repr); + return 0; + +err_port_disable: + nfp_port_configure(netdev, false); + return err; } const struct net_device_ops nfp_repr_netdev_ops = { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index e42644dbb865..d16a7b78ba9b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -181,6 +181,33 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len) return 0; } +/** + * nfp_port_configure() - helper to set the interface configured bit + * @netdev: net_device instance + * @configed: Desired state + * + * Helper to set the ifup/ifdown state on the PHY only if there is a physical + * interface associated with the netdev. + * + * Return: + * 0 - configuration successful (or no change); + * -ERRNO - configuration failed. + */ +int nfp_port_configure(struct net_device *netdev, bool configed) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return 0; + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed); + return err < 0 && err != -EOPNOTSUPP ? err : 0; +} + int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, struct nfp_port *port, unsigned int id) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index a33d22e18f94..56c76926c82a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -120,6 +120,7 @@ struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port); int nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len); +int nfp_port_configure(struct net_device *netdev, bool configed); struct nfp_port * nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index c2bc36e8649f..f6f7c085f8e0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -391,7 +391,10 @@ int nfp_eth_config_commit_end(struct nfp_nsp *nsp) * Enable or disable PHY module (this usually means setting the TX lanes * disable bits). * - * Return: 0 or -ERRNO. + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. */ int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable) { @@ -427,7 +430,10 @@ int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable) * * Set the ifup/ifdown state on the PHY. * - * Return: 0 or -ERRNO. + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. */ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) { @@ -439,6 +445,14 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) if (IS_ERR(nsp)) return PTR_ERR(nsp); + /* Older ABI versions did support this feature, however this has only + * been reliable since ABI 20. + */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 20) { + nfp_eth_config_cleanup_end(nsp); + return -EOPNOTSUPP; + } + entries = nfp_nsp_config_entries(nsp); /* Check if we are already in requested state */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 6c87bed13bd2..58a689fb04db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1684,6 +1684,8 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) "Load request was sent. Load code: 0x%x\n", load_code); + qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt); + qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt); p_hwfn->first_on_engine = (load_code == @@ -2472,6 +2474,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities; u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg; + struct qed_mcp_link_capabilities *p_caps; struct qed_mcp_link_params *link; /* Read global nvm_cfg address */ @@ -2534,6 +2537,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) /* Read default link configuration */ link = &p_hwfn->mcp_info->link_input; + p_caps = &p_hwfn->mcp_info->link_capabilities; port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset + offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]); link_temp = qed_rd(p_hwfn, p_ptt, @@ -2588,10 +2592,45 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX); link->loopback_mode = 0; - DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, - "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n", - link->speed.forced_speed, link->speed.advertised_speeds, - link->speed.autoneg, link->pause.autoneg); + if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) { + link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr + + offsetof(struct nvm_cfg1_port, ext_phy)); + link_temp &= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK; + link_temp >>= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET; + p_caps->default_eee = QED_MCP_EEE_ENABLED; + link->eee.enable = true; + switch (link_temp) { + case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED: + p_caps->default_eee = QED_MCP_EEE_DISABLED; + link->eee.enable = false; + break; + case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED: + p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_BALANCED_TIME; + break; + case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE: + p_caps->eee_lpi_timer = + EEE_TX_TIMER_USEC_AGGRESSIVE_TIME; + break; + case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY: + p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_LATENCY_TIME; + break; + } + + link->eee.tx_lpi_timer = p_caps->eee_lpi_timer; + link->eee.tx_lpi_enable = link->eee.enable; + link->eee.adv_caps = QED_EEE_1G_ADV | QED_EEE_10G_ADV; + } else { + p_caps->default_eee = QED_MCP_EEE_UNSUPPORTED; + } + + DP_VERBOSE(p_hwfn, + NETIF_MSG_LINK, + "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n", + link->speed.forced_speed, + link->speed.advertised_speeds, + link->speed.autoneg, + link->pause.autoneg, + p_caps->default_eee, p_caps->eee_lpi_timer); /* Read Multi-function information from shmem */ addr = MCP_REG_SCRATCH + nvm_cfg1_offset + @@ -2751,6 +2790,27 @@ static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_hw_info_port_num_ah(p_hwfn, p_ptt); } +static void qed_get_eee_caps(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_mcp_link_capabilities *p_caps; + u32 eee_status; + + p_caps = &p_hwfn->mcp_info->link_capabilities; + if (p_caps->default_eee == QED_MCP_EEE_UNSUPPORTED) + return; + + p_caps->eee_speed_caps = 0; + eee_status = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, eee_status)); + eee_status = (eee_status & EEE_SUPPORTED_SPEED_MASK) >> + EEE_SUPPORTED_SPEED_OFFSET; + + if (eee_status & EEE_1G_SUPPORTED) + p_caps->eee_speed_caps |= QED_EEE_1G_ADV; + if (eee_status & EEE_10G_ADV) + p_caps->eee_speed_caps |= QED_EEE_10G_ADV; +} + static int qed_get_hw_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -2767,6 +2827,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, qed_hw_info_port_num(p_hwfn, p_ptt); + qed_mcp_get_capabilities(p_hwfn, p_ptt); + qed_hw_get_nvm_info(p_hwfn, p_ptt); rc = qed_int_igu_read_cam(p_hwfn, p_ptt); @@ -2785,6 +2847,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, p_hwfn->mcp_info->func_info.ovlan; qed_mcp_cmd_port_init(p_hwfn, p_ptt); + + qed_get_eee_caps(p_hwfn, p_ptt); } if (qed_mcp_is_init(p_hwfn)) { @@ -3630,7 +3694,7 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } p_coal_timeset = p_eth_qzone; - memset(p_coal_timeset, 0, eth_qzone_size); + memset(p_eth_qzone, 0, eth_qzone_size); SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_TIMESET, timeset); SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_VALID, 1); qed_memcpy_to(p_hwfn, p_ptt, hw_addr, p_eth_qzone, eth_qzone_size); @@ -3638,12 +3702,46 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return 0; } -int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u16 qid, u16 sb_id) +int qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle) +{ + struct qed_queue_cid *p_cid = p_handle; + struct qed_hwfn *p_hwfn; + struct qed_ptt *p_ptt; + int rc = 0; + + p_hwfn = p_cid->p_owner; + + if (IS_VF(p_hwfn->cdev)) + return qed_vf_pf_set_coalesce(p_hwfn, rx_coal, tx_coal, p_cid); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EAGAIN; + + if (rx_coal) { + rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid); + if (rc) + goto out; + p_hwfn->cdev->rx_coalesce_usecs = rx_coal; + } + + if (tx_coal) { + rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal, p_cid); + if (rc) + goto out; + p_hwfn->cdev->tx_coalesce_usecs = tx_coal; + } +out: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} + +int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 coalesce, struct qed_queue_cid *p_cid) { struct ustorm_eth_queue_zone eth_qzone; u8 timeset, timer_res; - u16 fw_qid = 0; u32 address; int rc; @@ -3660,32 +3758,29 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } timeset = (u8)(coalesce >> timer_res); - rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); - if (rc) - return rc; - - rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, false); + rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, + p_cid->sb_igu_id, false); if (rc) goto out; - address = BAR0_MAP_REG_USDM_RAM + USTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid); + address = BAR0_MAP_REG_USDM_RAM + + USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id); rc = qed_set_coalesce(p_hwfn, p_ptt, address, ð_qzone, sizeof(struct ustorm_eth_queue_zone), timeset); if (rc) goto out; - p_hwfn->cdev->rx_coalesce_usecs = coalesce; out: return rc; } -int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u16 qid, u16 sb_id) +int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 coalesce, struct qed_queue_cid *p_cid) { struct xstorm_eth_queue_zone eth_qzone; u8 timeset, timer_res; - u16 fw_qid = 0; u32 address; int rc; @@ -3702,22 +3797,16 @@ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } timeset = (u8)(coalesce >> timer_res); - rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); - if (rc) - return rc; - - rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, true); + rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, + p_cid->sb_igu_id, true); if (rc) goto out; - address = BAR0_MAP_REG_XSDM_RAM + XSTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid); + address = BAR0_MAP_REG_XSDM_RAM + + XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id); rc = qed_set_coalesce(p_hwfn, p_ptt, address, ð_qzone, sizeof(struct xstorm_eth_queue_zone), timeset); - if (rc) - goto out; - - p_hwfn->cdev->tx_coalesce_usecs = coalesce; out: return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index 1f1df1bf127c..defdda1ffaa2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -443,38 +443,35 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 id, bool is_vf); /** - * @brief qed_set_rxq_coalesce - Configure coalesce parameters for an Rx queue - * The fact that we can configure coalescing to up to 511, but on varying - * accuracy [the bigger the value the less accurate] up to a mistake of 3usec - * for the highest values. + * @brief qed_get_queue_coalesce - Retrieve coalesce value for a given queue. * * @param p_hwfn - * @param p_ptt - * @param coalesce - Coalesce value in micro seconds. - * @param qid - Queue index. - * @param qid - SB Id + * @param p_coal - store coalesce value read from the hardware. + * @param p_handle * * @return int - */ -int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u16 qid, u16 sb_id); + **/ +int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle); /** - * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue - * While the API allows setting coalescing per-qid, all tx queues sharing a - * SB should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff] - * otherwise configuration would break. + * @brief qed_set_queue_coalesce - Configure coalesce parameters for Rx and + * Tx queue. The fact that we can configure coalescing to up to 511, but on + * varying accuracy [the bigger the value the less accurate] up to a mistake + * of 3usec for the highest values. + * While the API allows setting coalescing per-qid, all queues sharing a SB + * should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff] + * otherwise configuration would break. * - * @param p_hwfn - * @param p_ptt - * @param coalesce - Coalesce value in micro seconds. - * @param qid - Queue index. - * @param qid - SB Id + * + * @param rx_coal - Rx Coalesce value in micro seconds. + * @param tx_coal - TX Coalesce value in micro seconds. + * @param p_handle * * @return int - */ -int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u16 qid, u16 sb_id); + **/ +int +qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle); + const char *qed_hw_get_resc_name(enum qed_resources res_id); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 31fb0bffa098..3427fe7049b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -10825,6 +10825,17 @@ struct eth_phy_cfg { #define ETH_LOOPBACK_EXT (3) #define ETH_LOOPBACK_MAC (4) + u32 eee_cfg; +#define EEE_CFG_EEE_ENABLED BIT(0) +#define EEE_CFG_TX_LPI BIT(1) +#define EEE_CFG_ADV_SPEED_1G BIT(2) +#define EEE_CFG_ADV_SPEED_10G BIT(3) +#define EEE_TX_TIMER_USEC_MASK (0xfffffff0) +#define EEE_TX_TIMER_USEC_OFFSET 4 +#define EEE_TX_TIMER_USEC_BALANCED_TIME (0xa00) +#define EEE_TX_TIMER_USEC_AGGRESSIVE_TIME (0x100) +#define EEE_TX_TIMER_USEC_LATENCY_TIME (0x6000) + u32 feature_config_flags; #define ETH_EEE_MODE_ADV_LPI (1 << 0) }; @@ -11242,6 +11253,25 @@ struct public_port { u32 wol_pkt_len; u32 wol_pkt_details; struct dcb_dscp_map dcb_dscp_map; + + u32 eee_status; +#define EEE_ACTIVE_BIT BIT(0) +#define EEE_LD_ADV_STATUS_MASK 0x000000f0 +#define EEE_LD_ADV_STATUS_OFFSET 4 +#define EEE_1G_ADV BIT(1) +#define EEE_10G_ADV BIT(2) +#define EEE_LP_ADV_STATUS_MASK 0x00000f00 +#define EEE_LP_ADV_STATUS_OFFSET 8 +#define EEE_SUPPORTED_SPEED_MASK 0x0000f000 +#define EEE_SUPPORTED_SPEED_OFFSET 12 +#define EEE_1G_SUPPORTED BIT(1) +#define EEE_10G_SUPPORTED BIT(2) + + u32 eee_remote; +#define EEE_REMOTE_TW_TX_MASK 0x0000ffff +#define EEE_REMOTE_TW_TX_OFFSET 0 +#define EEE_REMOTE_TW_RX_MASK 0xffff0000 +#define EEE_REMOTE_TW_RX_OFFSET 16 }; struct public_func { @@ -11570,6 +11600,9 @@ struct public_drv_mb { #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000 #define DRV_MSG_CODE_OS_WOL 0x002e0000 +#define DRV_MSG_CODE_FEATURE_SUPPORT 0x00300000 +#define DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT 0x00310000 + #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; @@ -11653,6 +11686,10 @@ struct public_drv_mb { #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT 8 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK 0x0000FF00 +#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_MASK 0x0000FFFF +#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_OFFSET 0 +#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002 + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_UNSUPPORTED 0x00000000 @@ -11696,6 +11733,9 @@ struct public_drv_mb { #define FW_MB_PARAM_GET_PF_RDMA_IWARP 0x2 #define FW_MB_PARAM_GET_PF_RDMA_BOTH 0x3 +/* get MFW feature support response */ +#define FW_MB_PARAM_FEATURE_SUPPORT_EEE 0x00000002 + #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR (1 << 0) u32 drv_pulse_mb; @@ -11891,7 +11931,16 @@ struct nvm_cfg1_port { #define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX 0x4 u32 phy_cfg; u32 mgmt_traffic; + u32 ext_phy; + /* EEE power saving mode */ +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK 0x00FF0000 +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET 16 +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED 0x0 +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED 0x1 +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE 0x2 +#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY 0x3 + u32 mba_cfg1; u32 mba_cfg2; u32 vf_cfg; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 0ba5ec8a9814..9a1645852015 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2047,6 +2047,106 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return qed_spq_post(p_hwfn, p_ent, NULL); } +int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_queue_cid *p_cid, u16 *p_rx_coal) +{ + u32 coalesce, address, is_valid; + struct cau_sb_entry sb_entry; + u8 timer_res; + int rc; + + rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY + + p_cid->sb_igu_id * sizeof(u64), + (u64)(uintptr_t)&sb_entry, 2, 0); + if (rc) { + DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc); + return rc; + } + + timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES0); + + address = BAR0_MAP_REG_USDM_RAM + + USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id); + coalesce = qed_rd(p_hwfn, p_ptt, address); + + is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID); + if (!is_valid) + return -EINVAL; + + coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET); + *p_rx_coal = (u16)(coalesce << timer_res); + + return 0; +} + +int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_queue_cid *p_cid, u16 *p_tx_coal) +{ + u32 coalesce, address, is_valid; + struct cau_sb_entry sb_entry; + u8 timer_res; + int rc; + + rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY + + p_cid->sb_igu_id * sizeof(u64), + (u64)(uintptr_t)&sb_entry, 2, 0); + if (rc) { + DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc); + return rc; + } + + timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES1); + + address = BAR0_MAP_REG_XSDM_RAM + + XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id); + coalesce = qed_rd(p_hwfn, p_ptt, address); + + is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID); + if (!is_valid) + return -EINVAL; + + coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET); + *p_tx_coal = (u16)(coalesce << timer_res); + + return 0; +} + +int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *p_coal, void *handle) +{ + struct qed_queue_cid *p_cid = handle; + struct qed_ptt *p_ptt; + int rc = 0; + + if (IS_VF(p_hwfn->cdev)) { + rc = qed_vf_pf_get_coalesce(p_hwfn, p_coal, p_cid); + if (rc) + DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n"); + + return rc; + } + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EAGAIN; + + if (p_cid->b_is_rx) { + rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, p_coal); + if (rc) + goto out; + } else { + rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, p_coal); + if (rc) + goto out; + } + +out: + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + static int qed_fill_eth_dev_info(struct qed_dev *cdev, struct qed_dev_eth_info *info) { @@ -2696,6 +2796,20 @@ static int qed_ntuple_arfs_filter_config(struct qed_dev *cdev, void *cookie, return rc; } +static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle) +{ + struct qed_queue_cid *p_cid = handle; + struct qed_hwfn *p_hwfn; + int rc; + + p_hwfn = p_cid->p_owner; + rc = qed_get_queue_coalesce(p_hwfn, coal, handle); + if (rc) + DP_NOTICE(p_hwfn, "Unable to read queue calescing\n"); + + return rc; +} + static int qed_fp_cqe_completion(struct qed_dev *dev, u8 rss_id, struct eth_slow_path_rx_cqe *cqe) { @@ -2739,6 +2853,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .tunn_config = &qed_tunn_configure, .ntuple_filter_config = &qed_ntuple_arfs_filter_config, .configure_arfs_searcher = &qed_configure_arfs_searcher, + .get_coalesce = &qed_get_coalesce, }; const struct qed_eth_ops *qed_get_eth_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index f8f09aadced7..cc1f248551c9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -400,4 +400,20 @@ qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, u8 qed_mcast_bin_from_mac(u8 *mac); -#endif /* _QED_L2_H */ +int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 coalesce, struct qed_queue_cid *p_cid); + +int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u16 coalesce, struct qed_queue_cid *p_cid); + +int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_queue_cid *p_cid, u16 *p_hw_coal); + +int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_queue_cid *p_cid, u16 *p_hw_coal); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b11399606990..27832885a87f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -954,9 +954,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, struct qed_tunnel_info tunn_info; const u8 *data = NULL; struct qed_hwfn *hwfn; -#ifdef CONFIG_RFS_ACCEL struct qed_ptt *p_ptt; -#endif int rc = -EINVAL; if (qed_iov_wq_start(cdev)) @@ -972,7 +970,6 @@ static int qed_slowpath_start(struct qed_dev *cdev, goto err; } -#ifdef CONFIG_RFS_ACCEL if (cdev->num_hwfns == 1) { p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); if (p_ptt) { @@ -983,7 +980,6 @@ static int qed_slowpath_start(struct qed_dev *cdev, goto err; } } -#endif } cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS; @@ -1091,12 +1087,10 @@ err: if (IS_PF(cdev)) release_firmware(cdev->firmware); -#ifdef CONFIG_RFS_ACCEL if (IS_PF(cdev) && (cdev->num_hwfns == 1) && QED_LEADING_HWFN(cdev)->p_arfs_ptt) qed_ptt_release(QED_LEADING_HWFN(cdev), QED_LEADING_HWFN(cdev)->p_arfs_ptt); -#endif qed_iov_wq_stop(cdev, false); @@ -1111,11 +1105,9 @@ static int qed_slowpath_stop(struct qed_dev *cdev) qed_ll2_dealloc_if(cdev); if (IS_PF(cdev)) { -#ifdef CONFIG_RFS_ACCEL if (cdev->num_hwfns == 1) qed_ptt_release(QED_LEADING_HWFN(cdev), QED_LEADING_HWFN(cdev)->p_arfs_ptt); -#endif qed_free_stream_mem(cdev); if (IS_QED_ETH_IF(cdev)) qed_sriov_disable(cdev, true); @@ -1305,6 +1297,10 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) } } + if (params->override_flags & QED_LINK_OVERRIDE_EEE_CONFIG) + memcpy(&link_params->eee, ¶ms->eee, + sizeof(link_params->eee)); + rc = qed_mcp_set_link(hwfn, ptt, params->link_up); qed_ptt_release(hwfn, ptt); @@ -1491,6 +1487,21 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE || link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE) if_link->lp_caps |= QED_LM_Asym_Pause_BIT; + + if (link_caps.default_eee == QED_MCP_EEE_UNSUPPORTED) { + if_link->eee_supported = false; + } else { + if_link->eee_supported = true; + if_link->eee_active = link.eee_active; + if_link->sup_caps = link_caps.eee_speed_caps; + /* MFW clears adv_caps on eee disable; use configured value */ + if_link->eee.adv_caps = link.eee_adv_caps ? link.eee_adv_caps : + params.eee.adv_caps; + if_link->eee.lp_adv_caps = link.eee_lp_adv_caps; + if_link->eee.enable = params.eee.enable; + if_link->eee.tx_lpi_enable = params.eee.tx_lpi_enable; + if_link->eee.tx_lpi_timer = params.eee.tx_lpi_timer; + } } static void qed_get_current_link(struct qed_dev *cdev, @@ -1557,36 +1568,10 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type, return rc; } -static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal) -{ - *rx_coal = cdev->rx_coalesce_usecs; - *tx_coal = cdev->tx_coalesce_usecs; -} - static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u16 qid, u16 sb_id) + void *handle) { - struct qed_hwfn *hwfn; - struct qed_ptt *ptt; - int hwfn_index; - int status = 0; - - hwfn_index = qid % cdev->num_hwfns; - hwfn = &cdev->hwfns[hwfn_index]; - ptt = qed_ptt_acquire(hwfn); - if (!ptt) - return -EAGAIN; - - status = qed_set_rxq_coalesce(hwfn, ptt, rx_coal, - qid / cdev->num_hwfns, sb_id); - if (status) - goto out; - status = qed_set_txq_coalesce(hwfn, ptt, tx_coal, - qid / cdev->num_hwfns, sb_id); -out: - qed_ptt_release(hwfn, ptt); - - return status; + return qed_set_queue_coalesce(rx_coal, tx_coal, handle); } static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) @@ -1735,7 +1720,6 @@ const struct qed_common_ops qed_common_ops_pass = { .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, .nvm_get_image = &qed_nvm_get_image, - .get_coalesce = &qed_get_coalesce, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, .update_drv_state = &qed_update_drv_state, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9da91045d167..c1ecce6b9141 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1097,6 +1097,31 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n"); } +static void qed_mcp_read_eee_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_mcp_link_state *p_link) +{ + u32 eee_status, val; + + p_link->eee_adv_caps = 0; + p_link->eee_lp_adv_caps = 0; + eee_status = qed_rd(p_hwfn, + p_ptt, + p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, eee_status)); + p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT); + val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET; + if (val & EEE_1G_ADV) + p_link->eee_adv_caps |= QED_EEE_1G_ADV; + if (val & EEE_10G_ADV) + p_link->eee_adv_caps |= QED_EEE_10G_ADV; + val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET; + if (val & EEE_1G_ADV) + p_link->eee_lp_adv_caps |= QED_EEE_1G_ADV; + if (val & EEE_10G_ADV) + p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV; +} + static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_reset) { @@ -1228,6 +1253,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT); + if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) + qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link); + qed_link_update(p_hwfn); out: spin_unlock_bh(&p_hwfn->mcp_info->link_lock); @@ -1251,6 +1279,19 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0; phy_cfg.adv_speed = params->speed.advertised_speeds; phy_cfg.loopback_mode = params->loopback_mode; + if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) { + if (params->eee.enable) + phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED; + if (params->eee.tx_lpi_enable) + phy_cfg.eee_cfg |= EEE_CFG_TX_LPI; + if (params->eee.adv_caps & QED_EEE_1G_ADV) + phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G; + if (params->eee.adv_caps & QED_EEE_10G_ADV) + phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G; + phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer << + EEE_TX_TIMER_USEC_OFFSET) & + EEE_TX_TIMER_USEC_MASK; + } p_hwfn->b_drv_link_init = b_up; @@ -2822,3 +2863,28 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, p_unlock->resource = resource; } } + +int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 mcp_resp; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT, + 0, &mcp_resp, &p_hwfn->mcp_info->capabilities); + if (!rc) + DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_PROBE), + "MFW supported features: %08x\n", + p_hwfn->mcp_info->capabilities); + + return rc; +} + +int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 mcp_resp, mcp_param, features; + + features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE; + + return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT, + features, &mcp_resp, &mcp_param); +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index af03b3651411..c7ec2395d1ce 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -53,15 +53,25 @@ struct qed_mcp_link_pause_params { bool forced_tx; }; +enum qed_mcp_eee_mode { + QED_MCP_EEE_DISABLED, + QED_MCP_EEE_ENABLED, + QED_MCP_EEE_UNSUPPORTED +}; + struct qed_mcp_link_params { - struct qed_mcp_link_speed_params speed; - struct qed_mcp_link_pause_params pause; - u32 loopback_mode; + struct qed_mcp_link_speed_params speed; + struct qed_mcp_link_pause_params pause; + u32 loopback_mode; + struct qed_link_eee_params eee; }; struct qed_mcp_link_capabilities { u32 speed_capabilities; bool default_speed_autoneg; + enum qed_mcp_eee_mode default_eee; + u32 eee_lpi_timer; + u8 eee_speed_caps; }; struct qed_mcp_link_state { @@ -102,6 +112,9 @@ struct qed_mcp_link_state { u8 partner_adv_pause; bool sfp_tx_fault; + bool eee_active; + u8 eee_adv_caps; + u8 eee_lp_adv_caps; }; struct qed_mcp_function_info { @@ -546,6 +559,9 @@ struct qed_mcp_info { u8 *mfw_mb_shadow; u16 mfw_mb_length; u32 mcp_hist; + + /* Capabilties negotiated with the MFW */ + u32 capabilities; }; struct qed_mcp_mb_params { @@ -925,5 +941,20 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, struct qed_resc_unlock_params *p_unlock, enum qed_resc_lock resource, bool b_is_permanent); +/** + * @brief Learn of supported MFW features; To be done during early init + * + * @param p_hwfn + * @param p_ptt + */ +int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief Inform MFW of set of features supported by driver. Should be done + * inside the content of the LOAD_REQ. + * + * @param p_hwfn + * @param p_ptt + */ +int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 2cfd3bd9a031..3f40b1de7957 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3400,6 +3400,157 @@ static void qed_iov_vf_mbx_release(struct qed_hwfn *p_hwfn, length, status); } +static void qed_iov_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_vf_info *p_vf) +{ + struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx; + struct pfvf_read_coal_resp_tlv *p_resp; + struct vfpf_read_coal_req_tlv *req; + u8 status = PFVF_STATUS_FAILURE; + struct qed_vf_queue *p_queue; + struct qed_queue_cid *p_cid; + u16 coal = 0, qid, i; + bool b_is_rx; + int rc = 0; + + mbx->offset = (u8 *)mbx->reply_virt; + req = &mbx->req_virt->read_coal_req; + + qid = req->qid; + b_is_rx = req->is_rx ? true : false; + + if (b_is_rx) { + if (!qed_iov_validate_rxq(p_hwfn, p_vf, qid, + QED_IOV_VALIDATE_Q_ENABLE)) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d]: Invalid Rx queue_id = %d\n", + p_vf->abs_vf_id, qid); + goto send_resp; + } + + p_cid = qed_iov_get_vf_rx_queue_cid(&p_vf->vf_queues[qid]); + rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, &coal); + if (rc) + goto send_resp; + } else { + if (!qed_iov_validate_txq(p_hwfn, p_vf, qid, + QED_IOV_VALIDATE_Q_ENABLE)) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d]: Invalid Tx queue_id = %d\n", + p_vf->abs_vf_id, qid); + goto send_resp; + } + for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) { + p_queue = &p_vf->vf_queues[qid]; + if ((!p_queue->cids[i].p_cid) || + (!p_queue->cids[i].b_is_tx)) + continue; + + p_cid = p_queue->cids[i].p_cid; + + rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, &coal); + if (rc) + goto send_resp; + break; + } + } + + status = PFVF_STATUS_SUCCESS; + +send_resp: + p_resp = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_COALESCE_READ, + sizeof(*p_resp)); + p_resp->coal = coal; + + qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_resp), status); +} + +static void qed_iov_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_vf_info *vf) +{ + struct qed_iov_vf_mbx *mbx = &vf->vf_mbx; + struct vfpf_update_coalesce *req; + u8 status = PFVF_STATUS_FAILURE; + struct qed_queue_cid *p_cid; + u16 rx_coal, tx_coal; + int rc = 0, i; + u16 qid; + + req = &mbx->req_virt->update_coalesce; + + rx_coal = req->rx_coal; + tx_coal = req->tx_coal; + qid = req->qid; + + if (!qed_iov_validate_rxq(p_hwfn, vf, qid, + QED_IOV_VALIDATE_Q_ENABLE) && rx_coal) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d]: Invalid Rx queue_id = %d\n", + vf->abs_vf_id, qid); + goto out; + } + + if (!qed_iov_validate_txq(p_hwfn, vf, qid, + QED_IOV_VALIDATE_Q_ENABLE) && tx_coal) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d]: Invalid Tx queue_id = %d\n", + vf->abs_vf_id, qid); + goto out; + } + + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "VF[%d]: Setting coalesce for VF rx_coal = %d, tx_coal = %d at queue = %d\n", + vf->abs_vf_id, rx_coal, tx_coal, qid); + + if (rx_coal) { + p_cid = qed_iov_get_vf_rx_queue_cid(&vf->vf_queues[qid]); + + rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid); + if (rc) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "VF[%d]: Unable to set rx queue = %d coalesce\n", + vf->abs_vf_id, vf->vf_queues[qid].fw_rx_qid); + goto out; + } + vf->rx_coal = rx_coal; + } + + if (tx_coal) { + struct qed_vf_queue *p_queue = &vf->vf_queues[qid]; + + for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) { + if (!p_queue->cids[i].p_cid) + continue; + + if (!p_queue->cids[i].b_is_tx) + continue; + + rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal, + p_queue->cids[i].p_cid); + + if (rc) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "VF[%d]: Unable to set tx queue coalesce\n", + vf->abs_vf_id); + goto out; + } + } + vf->tx_coal = tx_coal; + } + + status = PFVF_STATUS_SUCCESS; +out: + qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_COALESCE_UPDATE, + sizeof(struct pfvf_def_resp_tlv), status); +} static int qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf, struct qed_ptt *p_ptt) @@ -3725,6 +3876,12 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, case CHANNEL_TLV_UPDATE_TUNN_PARAM: qed_iov_vf_mbx_update_tunn_param(p_hwfn, p_ptt, p_vf); break; + case CHANNEL_TLV_COALESCE_UPDATE: + qed_iov_vf_pf_set_coalesce(p_hwfn, p_ptt, p_vf); + break; + case CHANNEL_TLV_COALESCE_READ: + qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf); + break; } } else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index c2e44bce398c..3955929ba892 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -217,6 +217,9 @@ struct qed_vf_info { u8 num_rxqs; u8 num_txqs; + u16 rx_coal; + u16 tx_coal; + u8 num_sbs; u8 num_mac_filters; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 1926d1ed439f..91b5e9f02a62 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1343,6 +1343,81 @@ exit: return rc; } +int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn, + u16 *p_coal, struct qed_queue_cid *p_cid) +{ + struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; + struct pfvf_read_coal_resp_tlv *resp; + struct vfpf_read_coal_req_tlv *req; + int rc; + + /* clear mailbox and prep header tlv */ + req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_READ, sizeof(*req)); + req->qid = p_cid->rel.queue_id; + req->is_rx = p_cid->b_is_rx ? 1 : 0; + + qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + resp = &p_iov->pf2vf_reply->read_coal_resp; + + rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + if (rc) + goto exit; + + if (resp->hdr.status != PFVF_STATUS_SUCCESS) + goto exit; + + *p_coal = resp->coal; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; +} + +int +qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn, + u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid) +{ + struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; + struct vfpf_update_coalesce *req; + struct pfvf_def_resp_tlv *resp; + int rc; + + /* clear mailbox and prep header tlv */ + req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_UPDATE, sizeof(*req)); + + req->rx_coal = rx_coal; + req->tx_coal = tx_coal; + req->qid = p_cid->rel.queue_id; + + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "Setting coalesce rx_coal = %d, tx_coal = %d at queue = %d\n", + rx_coal, tx_coal, req->qid); + + /* add list termination tlv */ + qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + resp = &p_iov->pf2vf_reply->default_resp; + rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + if (rc) + goto exit; + + if (resp->hdr.status != PFVF_STATUS_SUCCESS) + goto exit; + + if (rx_coal) + p_hwfn->cdev->rx_coalesce_usecs = rx_coal; + + if (tx_coal) + p_hwfn->cdev->tx_coalesce_usecs = tx_coal; + +exit: + qed_vf_pf_req_end(p_hwfn, rc); + return rc; +} + u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) { struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 34d9b882a780..97d44dfb38ca 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -497,6 +497,27 @@ struct tlv_buffer_size { u8 tlv_buffer[TLV_BUFFER_SIZE]; }; +struct vfpf_update_coalesce { + struct vfpf_first_tlv first_tlv; + u16 rx_coal; + u16 tx_coal; + u16 qid; + u8 padding[2]; +}; + +struct vfpf_read_coal_req_tlv { + struct vfpf_first_tlv first_tlv; + u16 qid; + u8 is_rx; + u8 padding[5]; +}; + +struct pfvf_read_coal_resp_tlv { + struct pfvf_tlv hdr; + u16 coal; + u8 padding[6]; +}; + union vfpf_tlvs { struct vfpf_first_tlv first_tlv; struct vfpf_acquire_tlv acquire; @@ -509,7 +530,8 @@ union vfpf_tlvs { struct vfpf_vport_update_tlv vport_update; struct vfpf_ucast_filter_tlv ucast_filter; struct vfpf_update_tunn_param_tlv tunn_param_update; - struct channel_list_end_tlv list_end; + struct vfpf_update_coalesce update_coalesce; + struct vfpf_read_coal_req_tlv read_coal_req; struct tlv_buffer_size tlv_buf_size; }; @@ -519,6 +541,7 @@ union pfvf_tlvs { struct tlv_buffer_size tlv_buf_size; struct pfvf_start_queue_resp_tlv queue_start; struct pfvf_update_tunn_param_tlv tunn_param_resp; + struct pfvf_read_coal_resp_tlv read_coal_resp; }; enum qed_bulletin_bit { @@ -624,8 +647,9 @@ enum { CHANNEL_TLV_VPORT_UPDATE_ACCEPT_ANY_VLAN, CHANNEL_TLV_VPORT_UPDATE_SGE_TPA, CHANNEL_TLV_UPDATE_TUNN_PARAM, - CHANNEL_TLV_RESERVED, + CHANNEL_TLV_COALESCE_UPDATE, CHANNEL_TLV_QID, + CHANNEL_TLV_COALESCE_READ, CHANNEL_TLV_MAX, /* Required for iterating over vport-update tlvs. @@ -677,6 +701,31 @@ struct qed_vf_iov { bool b_doorbell_bar; }; +/** + * @brief VF - Set Rx/Tx coalesce per VF's relative queue. + * Coalesce value '0' will omit the configuration. + * + * @param p_hwfn + * @param rx_coal - coalesce value in micro second for rx queue + * @param tx_coal - coalesce value in micro second for tx queue + * @param p_cid - queue cid + * + **/ +int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn, + u16 rx_coal, + u16 tx_coal, struct qed_queue_cid *p_cid); + +/** + * @brief VF - Get coalesce per VF's relative queue. + * + * @param p_hwfn + * @param p_coal - coalesce value in micro second for VF queues. + * @param p_cid - queue cid + * + **/ +int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn, + u16 *p_coal, struct qed_queue_cid *p_cid); + #ifdef CONFIG_QED_SRIOV /** * @brief Read the VF bulletin and act on it if needed diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 4dfb238221f9..adb700512baa 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -160,6 +160,8 @@ struct qede_rdma_dev { struct qede_ptp; +#define QEDE_RFS_MAX_FLTR 256 + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -241,9 +243,7 @@ struct qede_dev { u16 vxlan_dst_port; u16 geneve_dst_port; -#ifdef CONFIG_RFS_ACCEL struct qede_arfs *arfs; -#endif bool wol_enabled; struct qede_rdma_dev rdma_info; @@ -447,16 +447,21 @@ struct qede_fastpath { #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); +#define QEDE_SP_ARFS_CONFIG 4 +#define QEDE_SP_TASK_POLL_DELAY (5 * HZ) +#endif + void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr); void qede_poll_for_freeing_arfs_filters(struct qede_dev *edev); void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc); void qede_free_arfs(struct qede_dev *edev); int qede_alloc_arfs(struct qede_dev *edev); - -#define QEDE_SP_ARFS_CONFIG 4 -#define QEDE_SP_TASK_POLL_DELAY (5 * HZ) -#define QEDE_RFS_MAX_FLTR 256 -#endif +int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info); +int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info); +int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd); +int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info, + u32 *rule_locs); +int qede_get_arfs_filter_count(struct qede_dev *edev); struct qede_reload_args { void (*func)(struct qede_dev *edev, struct qede_reload_args *args); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 6a03d3e66cff..dae741270022 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -702,24 +702,62 @@ static u32 qede_get_link(struct net_device *dev) static int qede_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { + void *rx_handle = NULL, *tx_handle = NULL; struct qede_dev *edev = netdev_priv(dev); - u16 rxc, txc; + u16 rx_coal, tx_coal, i, rc = 0; + struct qede_fastpath *fp; + + rx_coal = QED_DEFAULT_RX_USECS; + tx_coal = QED_DEFAULT_TX_USECS; memset(coal, 0, sizeof(struct ethtool_coalesce)); - edev->ops->common->get_coalesce(edev->cdev, &rxc, &txc); - coal->rx_coalesce_usecs = rxc; - coal->tx_coalesce_usecs = txc; + __qede_lock(edev); + if (edev->state == QEDE_STATE_OPEN) { + for_each_queue(i) { + fp = &edev->fp_array[i]; - return 0; + if (fp->type & QEDE_FASTPATH_RX) { + rx_handle = fp->rxq->handle; + break; + } + } + + rc = edev->ops->get_coalesce(edev->cdev, &rx_coal, rx_handle); + if (rc) { + DP_INFO(edev, "Read Rx coalesce error\n"); + goto out; + } + + for_each_queue(i) { + fp = &edev->fp_array[i]; + if (fp->type & QEDE_FASTPATH_TX) { + tx_handle = fp->txq->handle; + break; + } + } + + rc = edev->ops->get_coalesce(edev->cdev, &tx_coal, tx_handle); + if (rc) + DP_INFO(edev, "Read Tx coalesce error\n"); + } + +out: + __qede_unlock(edev); + + coal->rx_coalesce_usecs = rx_coal; + coal->tx_coalesce_usecs = tx_coal; + + return rc; } static int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct qede_dev *edev = netdev_priv(dev); + struct qede_fastpath *fp; int i, rc = 0; - u16 rxc, txc, sb_id; + u16 rxc, txc; if (!netif_running(dev)) { DP_INFO(edev, "Interface is down\n"); @@ -730,21 +768,36 @@ static int qede_set_coalesce(struct net_device *dev, coal->tx_coalesce_usecs > QED_COALESCE_MAX) { DP_INFO(edev, "Can't support requested %s coalesce value [max supported value %d]\n", - coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx" - : "tx", - QED_COALESCE_MAX); + coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx" : + "tx", QED_COALESCE_MAX); return -EINVAL; } rxc = (u16)coal->rx_coalesce_usecs; txc = (u16)coal->tx_coalesce_usecs; for_each_queue(i) { - sb_id = edev->fp_array[i].sb_info->igu_sb_id; - rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc, - (u16)i, sb_id); - if (rc) { - DP_INFO(edev, "Set coalesce error, rc = %d\n", rc); - return rc; + fp = &edev->fp_array[i]; + + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + rc = edev->ops->common->set_coalesce(edev->cdev, + rxc, 0, + fp->rxq->handle); + if (rc) { + DP_INFO(edev, + "Set RX coalesce error, rc = %d\n", rc); + return rc; + } + } + + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + rc = edev->ops->common->set_coalesce(edev->cdev, + 0, txc, + fp->txq->handle); + if (rc) { + DP_INFO(edev, + "Set TX coalesce error, rc = %d\n", rc); + return rc; + } } } @@ -1045,20 +1098,34 @@ static int qede_get_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info) } static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, - u32 *rules __always_unused) + u32 *rule_locs) { struct qede_dev *edev = netdev_priv(dev); + int rc = 0; switch (info->cmd) { case ETHTOOL_GRXRINGS: info->data = QEDE_RSS_COUNT(edev); - return 0; + break; case ETHTOOL_GRXFH: - return qede_get_rss_flags(edev, info); + rc = qede_get_rss_flags(edev, info); + break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = qede_get_arfs_filter_count(edev); + info->data = QEDE_RFS_MAX_FLTR; + break; + case ETHTOOL_GRXCLSRULE: + rc = qede_get_cls_rule_entry(edev, info); + break; + case ETHTOOL_GRXCLSRLALL: + rc = qede_get_cls_rule_all(edev, info, rule_locs); + break; default: DP_ERR(edev, "Command parameters not supported\n"); - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; } + + return rc; } static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info) @@ -1168,14 +1235,24 @@ static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info) static int qede_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) { struct qede_dev *edev = netdev_priv(dev); + int rc; switch (info->cmd) { case ETHTOOL_SRXFH: - return qede_set_rss_flags(edev, info); + rc = qede_set_rss_flags(edev, info); + break; + case ETHTOOL_SRXCLSRLINS: + rc = qede_add_cls_rule(edev, info); + break; + case ETHTOOL_SRXCLSRLDEL: + rc = qede_del_cls_rule(edev, info); + break; default: DP_INFO(edev, "Command parameters not supported\n"); - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; } + + return rc; } static u32 qede_get_rxfh_indir_size(struct net_device *dev) @@ -1607,6 +1684,87 @@ static int qede_get_tunable(struct net_device *dev, return 0; } +static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + if (!current_link.eee_supported) { + DP_INFO(edev, "EEE is not supported\n"); + return -EOPNOTSUPP; + } + + if (current_link.eee.adv_caps & QED_EEE_1G_ADV) + edata->advertised = ADVERTISED_1000baseT_Full; + if (current_link.eee.adv_caps & QED_EEE_10G_ADV) + edata->advertised |= ADVERTISED_10000baseT_Full; + if (current_link.sup_caps & QED_EEE_1G_ADV) + edata->supported = ADVERTISED_1000baseT_Full; + if (current_link.sup_caps & QED_EEE_10G_ADV) + edata->supported |= ADVERTISED_10000baseT_Full; + if (current_link.eee.lp_adv_caps & QED_EEE_1G_ADV) + edata->lp_advertised = ADVERTISED_1000baseT_Full; + if (current_link.eee.lp_adv_caps & QED_EEE_10G_ADV) + edata->lp_advertised |= ADVERTISED_10000baseT_Full; + + edata->tx_lpi_timer = current_link.eee.tx_lpi_timer; + edata->eee_enabled = current_link.eee.enable; + edata->tx_lpi_enabled = current_link.eee.tx_lpi_enable; + edata->eee_active = current_link.eee_active; + + return 0; +} + +static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + struct qed_link_params params; + + if (!edev->ops->common->can_link_change(edev->cdev)) { + DP_INFO(edev, "Link settings are not allowed to be changed\n"); + return -EOPNOTSUPP; + } + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + if (!current_link.eee_supported) { + DP_INFO(edev, "EEE is not supported\n"); + return -EOPNOTSUPP; + } + + memset(¶ms, 0, sizeof(params)); + params.override_flags |= QED_LINK_OVERRIDE_EEE_CONFIG; + + if (!(edata->advertised & (ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full)) || + ((edata->advertised & (ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full)) != + edata->advertised)) { + DP_VERBOSE(edev, QED_MSG_DEBUG, + "Invalid advertised capabilities %d\n", + edata->advertised); + return -EINVAL; + } + + if (edata->advertised & ADVERTISED_1000baseT_Full) + params.eee.adv_caps = QED_EEE_1G_ADV; + if (edata->advertised & ADVERTISED_10000baseT_Full) + params.eee.adv_caps |= QED_EEE_10G_ADV; + params.eee.enable = edata->eee_enabled; + params.eee.tx_lpi_enable = edata->tx_lpi_enabled; + params.eee.tx_lpi_timer = edata->tx_lpi_timer; + + params.link_up = true; + edev->ops->common->set_link(edev->cdev, ¶ms); + + return 0; +} + static const struct ethtool_ops qede_ethtool_ops = { .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, @@ -1640,6 +1798,9 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_channels = qede_get_channels, .set_channels = qede_set_channels, .self_test = qede_self_test, + .get_eee = qede_get_eee, + .set_eee = qede_set_eee, + .get_tunable = qede_get_tunable, .set_tunable = qede_set_tunable, }; @@ -1650,6 +1811,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = { .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .get_link = qede_get_link, + .get_coalesce = qede_get_coalesce, + .set_coalesce = qede_set_coalesce, .get_ringparam = qede_get_ringparam, .set_ringparam = qede_set_ringparam, .get_strings = qede_get_strings, diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f939db5bac5f..f79e36e4060a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -38,7 +38,6 @@ #include <linux/qed/qed_if.h> #include "qede.h" -#ifdef CONFIG_RFS_ACCEL struct qede_arfs_tuple { union { __be32 src_ipv4; @@ -76,10 +75,12 @@ struct qede_arfs_fltr_node { u16 next_rxq_id; bool filter_op; bool used; + u8 fw_rc; struct hlist_node node; }; struct qede_arfs { +#define QEDE_ARFS_BUCKET_HEAD(edev, idx) (&(edev)->arfs->arfs_hl_head[idx]) #define QEDE_ARFS_POLL_COUNT 100 #define QEDE_RFS_FLW_BITSHIFT (4) #define QEDE_RFS_FLW_MASK ((1 << QEDE_RFS_FLW_BITSHIFT) - 1) @@ -121,11 +122,56 @@ qede_free_arfs_filter(struct qede_dev *edev, struct qede_arfs_fltr_node *fltr) kfree(fltr); } +static int +qede_enqueue_fltr_and_config_searcher(struct qede_dev *edev, + struct qede_arfs_fltr_node *fltr, + u16 bucket_idx) +{ + fltr->mapping = dma_map_single(&edev->pdev->dev, fltr->data, + fltr->buf_len, DMA_TO_DEVICE); + if (dma_mapping_error(&edev->pdev->dev, fltr->mapping)) { + DP_NOTICE(edev, "Failed to map DMA memory for rule\n"); + qede_free_arfs_filter(edev, fltr); + return -ENOMEM; + } + + INIT_HLIST_NODE(&fltr->node); + hlist_add_head(&fltr->node, + QEDE_ARFS_BUCKET_HEAD(edev, bucket_idx)); + edev->arfs->filter_count++; + + if (edev->arfs->filter_count == 1 && !edev->arfs->enable) { + edev->ops->configure_arfs_searcher(edev->cdev, true); + edev->arfs->enable = true; + } + + return 0; +} + +static void +qede_dequeue_fltr_and_config_searcher(struct qede_dev *edev, + struct qede_arfs_fltr_node *fltr) +{ + hlist_del(&fltr->node); + dma_unmap_single(&edev->pdev->dev, fltr->mapping, + fltr->buf_len, DMA_TO_DEVICE); + + qede_free_arfs_filter(edev, fltr); + edev->arfs->filter_count--; + + if (!edev->arfs->filter_count && edev->arfs->enable) { + edev->arfs->enable = false; + edev->ops->configure_arfs_searcher(edev->cdev, false); + } +} + void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc) { struct qede_arfs_fltr_node *fltr = filter; struct qede_dev *edev = dev; + fltr->fw_rc = fw_rc; + if (fw_rc) { DP_NOTICE(edev, "Failed arfs filter configuration fw_rc=%d, flow_id=%d, sw_id=%d, src_port=%d, dst_port=%d, rxq=%d\n", @@ -185,18 +231,17 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr) if ((!test_bit(QEDE_FLTR_VALID, &fltr->state) && !fltr->used) || free_fltr) { - hlist_del(&fltr->node); - dma_unmap_single(&edev->pdev->dev, - fltr->mapping, - fltr->buf_len, DMA_TO_DEVICE); - qede_free_arfs_filter(edev, fltr); - edev->arfs->filter_count--; + qede_dequeue_fltr_and_config_searcher(edev, + fltr); } else { - if ((rps_may_expire_flow(edev->ndev, - fltr->rxq_id, - fltr->flow_id, - fltr->sw_id) || del) && - !free_fltr) + bool flow_exp = false; +#ifdef CONFIG_RFS_ACCEL + flow_exp = rps_may_expire_flow(edev->ndev, + fltr->rxq_id, + fltr->flow_id, + fltr->sw_id); +#endif + if ((flow_exp || del) && !free_fltr) qede_configure_arfs_fltr(edev, fltr, fltr->rxq_id, false); @@ -213,10 +258,12 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr) edev->arfs->enable = false; edev->ops->configure_arfs_searcher(edev->cdev, false); } +#ifdef CONFIG_RFS_ACCEL } else { set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags); schedule_delayed_work(&edev->sp_task, QEDE_SP_TASK_POLL_DELAY); +#endif } spin_unlock_bh(&edev->arfs->arfs_list_lock); @@ -258,25 +305,26 @@ int qede_alloc_arfs(struct qede_dev *edev) spin_lock_init(&edev->arfs->arfs_list_lock); for (i = 0; i <= QEDE_RFS_FLW_MASK; i++) - INIT_HLIST_HEAD(&edev->arfs->arfs_hl_head[i]); + INIT_HLIST_HEAD(QEDE_ARFS_BUCKET_HEAD(edev, i)); - edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev)); - if (!edev->ndev->rx_cpu_rmap) { + edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) * + sizeof(long)); + if (!edev->arfs->arfs_fltr_bmap) { vfree(edev->arfs); edev->arfs = NULL; return -ENOMEM; } - edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) * - sizeof(long)); - if (!edev->arfs->arfs_fltr_bmap) { - free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap); - edev->ndev->rx_cpu_rmap = NULL; +#ifdef CONFIG_RFS_ACCEL + edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev)); + if (!edev->ndev->rx_cpu_rmap) { + vfree(edev->arfs->arfs_fltr_bmap); + edev->arfs->arfs_fltr_bmap = NULL; vfree(edev->arfs); edev->arfs = NULL; return -ENOMEM; } - +#endif return 0; } @@ -285,16 +333,19 @@ void qede_free_arfs(struct qede_dev *edev) if (!edev->arfs) return; +#ifdef CONFIG_RFS_ACCEL if (edev->ndev->rx_cpu_rmap) free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap); edev->ndev->rx_cpu_rmap = NULL; +#endif vfree(edev->arfs->arfs_fltr_bmap); edev->arfs->arfs_fltr_bmap = NULL; vfree(edev->arfs); edev->arfs = NULL; } +#ifdef CONFIG_RFS_ACCEL static bool qede_compare_ip_addr(struct qede_arfs_fltr_node *tpos, const struct sk_buff *skb) { @@ -394,9 +445,8 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, spin_lock_bh(&edev->arfs->arfs_list_lock); - n = qede_arfs_htbl_key_search(&edev->arfs->arfs_hl_head[tbl_idx], + n = qede_arfs_htbl_key_search(QEDE_ARFS_BUCKET_HEAD(edev, tbl_idx), skb, ports[0], ports[1], ip_proto); - if (n) { /* Filter match */ n->next_rxq_id = rxq_index; @@ -448,23 +498,9 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, n->tuple.ip_proto = ip_proto; memcpy(n->data + ETH_HLEN, skb->data, skb_headlen(skb)); - n->mapping = dma_map_single(&edev->pdev->dev, n->data, - n->buf_len, DMA_TO_DEVICE); - if (dma_mapping_error(&edev->pdev->dev, n->mapping)) { - DP_NOTICE(edev, "Failed to map DMA memory for arfs\n"); - qede_free_arfs_filter(edev, n); - rc = -ENOMEM; + rc = qede_enqueue_fltr_and_config_searcher(edev, n, tbl_idx); + if (rc) goto ret_unlock; - } - - INIT_HLIST_NODE(&n->node); - hlist_add_head(&n->node, &edev->arfs->arfs_hl_head[tbl_idx]); - edev->arfs->filter_count++; - - if (edev->arfs->filter_count == 1 && !edev->arfs->enable) { - edev->ops->configure_arfs_searcher(edev->cdev, true); - edev->arfs->enable = true; - } qede_configure_arfs_fltr(edev, n, n->rxq_id, true); @@ -472,6 +508,7 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags); schedule_delayed_work(&edev->sp_task, 0); + return n->sw_id; ret_unlock: @@ -1263,3 +1300,371 @@ void qede_config_rx_mode(struct net_device *ndev) out: kfree(uc_macs); } + +static struct qede_arfs_fltr_node * +qede_get_arfs_fltr_by_loc(struct hlist_head *head, u32 location) +{ + struct qede_arfs_fltr_node *fltr; + + hlist_for_each_entry(fltr, head, node) + if (location == fltr->sw_id) + return fltr; + + return NULL; +} + +static bool +qede_compare_user_flow_ips(struct qede_arfs_fltr_node *tpos, + struct ethtool_rx_flow_spec *fsp, + __be16 proto) +{ + if (proto == htons(ETH_P_IP)) { + struct ethtool_tcpip4_spec *ip; + + ip = &fsp->h_u.tcp_ip4_spec; + + if (tpos->tuple.src_ipv4 == ip->ip4src && + tpos->tuple.dst_ipv4 == ip->ip4dst) + return true; + else + return false; + } else { + struct ethtool_tcpip6_spec *ip6; + struct in6_addr *src; + + ip6 = &fsp->h_u.tcp_ip6_spec; + src = &tpos->tuple.src_ipv6; + + if (!memcmp(src, &ip6->ip6src, sizeof(struct in6_addr)) && + !memcmp(&tpos->tuple.dst_ipv6, &ip6->ip6dst, + sizeof(struct in6_addr))) + return true; + else + return false; + } + return false; +} + +int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct qede_arfs_fltr_node *fltr; + struct hlist_head *head; + int cnt = 0, rc = 0; + + info->data = QEDE_RFS_MAX_FLTR; + + __qede_lock(edev); + + if (!edev->arfs) { + rc = -EPERM; + goto unlock; + } + + head = QEDE_ARFS_BUCKET_HEAD(edev, 0); + + hlist_for_each_entry(fltr, head, node) { + if (cnt == info->rule_cnt) { + rc = -EMSGSIZE; + goto unlock; + } + + rule_locs[cnt] = fltr->sw_id; + cnt++; + } + + info->rule_cnt = cnt; + +unlock: + __qede_unlock(edev); + return rc; +} + +int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct qede_arfs_fltr_node *fltr = NULL; + int rc = 0; + + cmd->data = QEDE_RFS_MAX_FLTR; + + __qede_lock(edev); + + if (!edev->arfs) { + rc = -EPERM; + goto unlock; + } + + fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0), + fsp->location); + if (!fltr) { + DP_NOTICE(edev, "Rule not found - location=0x%x\n", + fsp->location); + rc = -EINVAL; + goto unlock; + } + + if (fltr->tuple.eth_proto == htons(ETH_P_IP)) { + if (fltr->tuple.ip_proto == IPPROTO_TCP) + fsp->flow_type = TCP_V4_FLOW; + else + fsp->flow_type = UDP_V4_FLOW; + + fsp->h_u.tcp_ip4_spec.psrc = fltr->tuple.src_port; + fsp->h_u.tcp_ip4_spec.pdst = fltr->tuple.dst_port; + fsp->h_u.tcp_ip4_spec.ip4src = fltr->tuple.src_ipv4; + fsp->h_u.tcp_ip4_spec.ip4dst = fltr->tuple.dst_ipv4; + } else { + if (fltr->tuple.ip_proto == IPPROTO_TCP) + fsp->flow_type = TCP_V6_FLOW; + else + fsp->flow_type = UDP_V6_FLOW; + fsp->h_u.tcp_ip6_spec.psrc = fltr->tuple.src_port; + fsp->h_u.tcp_ip6_spec.pdst = fltr->tuple.dst_port; + memcpy(&fsp->h_u.tcp_ip6_spec.ip6src, + &fltr->tuple.src_ipv6, sizeof(struct in6_addr)); + memcpy(&fsp->h_u.tcp_ip6_spec.ip6dst, + &fltr->tuple.dst_ipv6, sizeof(struct in6_addr)); + } + + fsp->ring_cookie = fltr->rxq_id; + +unlock: + __qede_unlock(edev); + return rc; +} + +static int +qede_validate_and_check_flow_exist(struct qede_dev *edev, + struct ethtool_rx_flow_spec *fsp, + int *min_hlen) +{ + __be16 src_port = 0x0, dst_port = 0x0; + struct qede_arfs_fltr_node *fltr; + struct hlist_node *temp; + struct hlist_head *head; + __be16 eth_proto; + u8 ip_proto; + + if (fsp->location >= QEDE_RFS_MAX_FLTR || + fsp->ring_cookie >= QEDE_RSS_COUNT(edev)) + return -EINVAL; + + if (fsp->flow_type == TCP_V4_FLOW) { + *min_hlen += sizeof(struct iphdr) + + sizeof(struct tcphdr); + eth_proto = htons(ETH_P_IP); + ip_proto = IPPROTO_TCP; + } else if (fsp->flow_type == UDP_V4_FLOW) { + *min_hlen += sizeof(struct iphdr) + + sizeof(struct udphdr); + eth_proto = htons(ETH_P_IP); + ip_proto = IPPROTO_UDP; + } else if (fsp->flow_type == TCP_V6_FLOW) { + *min_hlen += sizeof(struct ipv6hdr) + + sizeof(struct tcphdr); + eth_proto = htons(ETH_P_IPV6); + ip_proto = IPPROTO_TCP; + } else if (fsp->flow_type == UDP_V6_FLOW) { + *min_hlen += sizeof(struct ipv6hdr) + + sizeof(struct udphdr); + eth_proto = htons(ETH_P_IPV6); + ip_proto = IPPROTO_UDP; + } else { + DP_NOTICE(edev, "Unsupported flow type = 0x%x\n", + fsp->flow_type); + return -EPROTONOSUPPORT; + } + + if (eth_proto == htons(ETH_P_IP)) { + src_port = fsp->h_u.tcp_ip4_spec.psrc; + dst_port = fsp->h_u.tcp_ip4_spec.pdst; + } else { + src_port = fsp->h_u.tcp_ip6_spec.psrc; + dst_port = fsp->h_u.tcp_ip6_spec.pdst; + } + + head = QEDE_ARFS_BUCKET_HEAD(edev, 0); + hlist_for_each_entry_safe(fltr, temp, head, node) { + if ((fltr->tuple.ip_proto == ip_proto && + fltr->tuple.eth_proto == eth_proto && + qede_compare_user_flow_ips(fltr, fsp, eth_proto) && + fltr->tuple.src_port == src_port && + fltr->tuple.dst_port == dst_port) || + fltr->sw_id == fsp->location) + return -EEXIST; + } + + return 0; +} + +static int +qede_poll_arfs_filter_config(struct qede_dev *edev, + struct qede_arfs_fltr_node *fltr) +{ + int count = QEDE_ARFS_POLL_COUNT; + + while (fltr->used && count) { + msleep(20); + count--; + } + + if (count == 0 || fltr->fw_rc) { + qede_dequeue_fltr_and_config_searcher(edev, fltr); + return -EIO; + } + + return fltr->fw_rc; +} + +int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) +{ + struct ethtool_rx_flow_spec *fsp = &info->fs; + struct qede_arfs_fltr_node *n; + int min_hlen = ETH_HLEN, rc; + struct ethhdr *eth; + struct iphdr *ip; + __be16 *ports; + + __qede_lock(edev); + + if (!edev->arfs) { + rc = -EPERM; + goto unlock; + } + + rc = qede_validate_and_check_flow_exist(edev, fsp, &min_hlen); + if (rc) + goto unlock; + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) { + rc = -ENOMEM; + goto unlock; + } + + n->data = kzalloc(min_hlen, GFP_KERNEL); + if (!n->data) { + kfree(n); + rc = -ENOMEM; + goto unlock; + } + + n->sw_id = fsp->location; + set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap); + n->buf_len = min_hlen; + n->rxq_id = fsp->ring_cookie; + n->next_rxq_id = n->rxq_id; + eth = (struct ethhdr *)n->data; + + if (info->fs.flow_type == TCP_V4_FLOW || + info->fs.flow_type == UDP_V4_FLOW) { + ports = (__be16 *)(n->data + ETH_HLEN + + sizeof(struct iphdr)); + eth->h_proto = htons(ETH_P_IP); + n->tuple.eth_proto = htons(ETH_P_IP); + n->tuple.src_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4src; + n->tuple.dst_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4dst; + n->tuple.src_port = info->fs.h_u.tcp_ip4_spec.psrc; + n->tuple.dst_port = info->fs.h_u.tcp_ip4_spec.pdst; + ports[0] = n->tuple.src_port; + ports[1] = n->tuple.dst_port; + ip = (struct iphdr *)(n->data + ETH_HLEN); + ip->saddr = info->fs.h_u.tcp_ip4_spec.ip4src; + ip->daddr = info->fs.h_u.tcp_ip4_spec.ip4dst; + ip->version = 0x4; + ip->ihl = 0x5; + + if (info->fs.flow_type == TCP_V4_FLOW) { + n->tuple.ip_proto = IPPROTO_TCP; + ip->protocol = IPPROTO_TCP; + } else { + n->tuple.ip_proto = IPPROTO_UDP; + ip->protocol = IPPROTO_UDP; + } + ip->tot_len = cpu_to_be16(min_hlen - ETH_HLEN); + } else { + struct ipv6hdr *ip6; + + ip6 = (struct ipv6hdr *)(n->data + ETH_HLEN); + ports = (__be16 *)(n->data + ETH_HLEN + + sizeof(struct ipv6hdr)); + eth->h_proto = htons(ETH_P_IPV6); + n->tuple.eth_proto = htons(ETH_P_IPV6); + memcpy(&n->tuple.src_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&n->tuple.dst_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + n->tuple.src_port = info->fs.h_u.tcp_ip6_spec.psrc; + n->tuple.dst_port = info->fs.h_u.tcp_ip6_spec.pdst; + ports[0] = n->tuple.src_port; + ports[1] = n->tuple.dst_port; + memcpy(&ip6->saddr, &n->tuple.src_ipv6, + sizeof(struct in6_addr)); + memcpy(&ip6->daddr, &n->tuple.dst_ipv6, + sizeof(struct in6_addr)); + ip6->version = 0x6; + + if (info->fs.flow_type == TCP_V6_FLOW) { + n->tuple.ip_proto = IPPROTO_TCP; + ip6->nexthdr = NEXTHDR_TCP; + ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr)); + } else { + n->tuple.ip_proto = IPPROTO_UDP; + ip6->nexthdr = NEXTHDR_UDP; + ip6->payload_len = cpu_to_be16(sizeof(struct udphdr)); + } + } + + rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0); + if (rc) + goto unlock; + + qede_configure_arfs_fltr(edev, n, n->rxq_id, true); + rc = qede_poll_arfs_filter_config(edev, n); +unlock: + __qede_unlock(edev); + return rc; +} + +int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) +{ + struct ethtool_rx_flow_spec *fsp = &info->fs; + struct qede_arfs_fltr_node *fltr = NULL; + int rc = -EPERM; + + __qede_lock(edev); + if (!edev->arfs) + goto unlock; + + fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0), + fsp->location); + if (!fltr) + goto unlock; + + qede_configure_arfs_fltr(edev, fltr, fltr->rxq_id, false); + + rc = qede_poll_arfs_filter_config(edev, fltr); + if (rc == 0) + qede_dequeue_fltr_and_config_searcher(edev, fltr); + +unlock: + __qede_unlock(edev); + return rc; +} + +int qede_get_arfs_filter_count(struct qede_dev *edev) +{ + int count = 0; + + __qede_lock(edev); + + if (!edev->arfs) + goto unlock; + + count = edev->arfs->filter_count; + +unlock: + __qede_unlock(edev); + return count; +} diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 06ca13dd9ddb..e5ee9f274a71 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -873,9 +873,7 @@ static void qede_update_pf_params(struct qed_dev *cdev) */ pf_params.eth_pf_params.num_vf_cons = 48; -#ifdef CONFIG_RFS_ACCEL pf_params.eth_pf_params.num_arfs_filters = QEDE_RFS_MAX_FLTR; -#endif qed_ops->common->update_pf_params(cdev, &pf_params); } @@ -1984,12 +1982,12 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); -#ifdef CONFIG_RFS_ACCEL + if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { qede_poll_for_freeing_arfs_filters(edev); qede_free_arfs(edev); } -#endif + /* Release the interrupts */ qede_sync_free_irqs(edev); edev->ops->common->set_fp_int(edev->cdev, 0); @@ -2041,13 +2039,12 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, if (rc) goto err2; -#ifdef CONFIG_RFS_ACCEL if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { rc = qede_alloc_arfs(edev); if (rc) DP_NOTICE(edev, "aRFS memory allocation failed\n"); } -#endif + qede_napi_add_enable(edev); DP_INFO(edev, "Napi added and enabled\n"); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 0844b7c75767..afa10a163da1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -1285,7 +1285,7 @@ flash_temp: int qlcnic_dump_fw(struct qlcnic_adapter *adapter) { struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; - static const struct qlcnic_dump_operations *fw_dump_ops; + const struct qlcnic_dump_operations *fw_dump_ops; struct qlcnic_83xx_dump_template_hdr *hdr_83xx; u32 entry_offset, dump, no_entries, buf_offset = 0; int i, k, ops_cnt, ops_index, dump_size = 0; diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c905971c5f3a..d3f96a8f743b 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -746,59 +746,171 @@ static const char *efx_mcdi_phy_test_name(struct efx_nic *efx, return NULL; } -#define SFP_PAGE_SIZE 128 -#define SFP_NUM_PAGES 2 -static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, - struct ethtool_eeprom *ee, u8 *data) +#define SFP_PAGE_SIZE 128 +#define SFF_DIAG_TYPE_OFFSET 92 +#define SFF_DIAG_ADDR_CHANGE BIT(2) +#define SFF_8079_NUM_PAGES 2 +#define SFF_8472_NUM_PAGES 4 +#define SFF_8436_NUM_PAGES 5 +#define SFF_DMT_LEVEL_OFFSET 94 + +/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom + * @efx: NIC context + * @page: EEPROM page number + * @data: Destination data pointer + * @offset: Offset in page to copy from in to data + * @space: Space available in data + * + * Return: + * >=0 - amount of data copied + * <0 - error + */ +static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx, + unsigned int page, + u8 *data, ssize_t offset, + ssize_t space) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX); MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN); size_t outlen; - int rc; unsigned int payload_len; - unsigned int space_remaining = ee->len; - unsigned int page; - unsigned int page_off; unsigned int to_copy; - u8 *user_data = data; + int rc; - BUILD_BUG_ON(SFP_PAGE_SIZE * SFP_NUM_PAGES != ETH_MODULE_SFF_8079_LEN); + if (offset > SFP_PAGE_SIZE) + return -EINVAL; - page_off = ee->offset % SFP_PAGE_SIZE; - page = ee->offset / SFP_PAGE_SIZE; + to_copy = min(space, SFP_PAGE_SIZE - offset); - while (space_remaining && (page < SFP_NUM_PAGES)) { - MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page); + MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO, + inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), + &outlen); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_MEDIA_INFO, - inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), - &outlen); - if (rc) - return rc; + if (rc) + return rc; + + if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST + + SFP_PAGE_SIZE)) + return -EIO; + + payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN); + if (payload_len != SFP_PAGE_SIZE) + return -EIO; - if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST + - SFP_PAGE_SIZE)) - return -EIO; + memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset, + to_copy); - payload_len = MCDI_DWORD(outbuf, - GET_PHY_MEDIA_INFO_OUT_DATALEN); - if (payload_len != SFP_PAGE_SIZE) - return -EIO; + return to_copy; +} - /* Copy as much as we can into data */ - payload_len -= page_off; - to_copy = (space_remaining < payload_len) ? - space_remaining : payload_len; +static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx, + unsigned int page, + u8 byte) +{ + int rc; + u8 data; - memcpy(user_data, - MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + page_off, - to_copy); + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1); + if (rc == 1) + return data; + + return rc; +} + +static int efx_mcdi_phy_diag_type(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the diagnostic type at byte 92. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DIAG_TYPE_OFFSET); +} - space_remaining -= to_copy; - user_data += to_copy; - page_off = 0; - page++; +static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the DMT level at byte 94. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DMT_LEVEL_OFFSET); +} + +static u32 efx_mcdi_phy_module_type(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS) + return phy_data->media; + + /* A QSFP+ NIC may actually have an SFP+ module attached. + * The ID is page 0, byte 0. + */ + switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) { + case 0x3: + return MC_CMD_MEDIA_SFP_PLUS; + case 0xc: + case 0xd: + return MC_CMD_MEDIA_QSFP_PLUS; + default: + return 0; + } +} + +static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, + struct ethtool_eeprom *ee, u8 *data) +{ + int rc; + ssize_t space_remaining = ee->len; + unsigned int page_off; + bool ignore_missing; + int num_pages; + int page; + + switch (efx_mcdi_phy_module_type(efx)) { + case MC_CMD_MEDIA_SFP_PLUS: + num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ? + SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES; + page = 0; + ignore_missing = false; + break; + case MC_CMD_MEDIA_QSFP_PLUS: + num_pages = SFF_8436_NUM_PAGES; + page = -1; /* We obtain the lower page by asking for -1. */ + ignore_missing = true; /* Ignore missing pages after page 0. */ + break; + default: + return -EOPNOTSUPP; + } + + page_off = ee->offset % SFP_PAGE_SIZE; + page += ee->offset / SFP_PAGE_SIZE; + + while (space_remaining && (page < num_pages)) { + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, + data, page_off, + space_remaining); + + if (rc > 0) { + space_remaining -= rc; + data += rc; + page_off = 0; + page++; + } else if (rc == 0) { + space_remaining = 0; + } else if (ignore_missing && (page > 0)) { + int intended_size = SFP_PAGE_SIZE - page_off; + + space_remaining -= intended_size; + if (space_remaining < 0) { + space_remaining = 0; + } else { + memset(data, 0, intended_size); + data += intended_size; + page_off = 0; + page++; + rc = 0; + } + } else { + return rc; + } } return 0; @@ -807,16 +919,42 @@ static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, static int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo) { - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + int sff_8472_level; + int diag_type; - switch (phy_cfg->media) { + switch (efx_mcdi_phy_module_type(efx)) { case MC_CMD_MEDIA_SFP_PLUS: - modinfo->type = ETH_MODULE_SFF_8079; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - return 0; + sff_8472_level = efx_mcdi_phy_sff_8472_level(efx); + + /* If we can't read the diagnostics level we have none. */ + if (sff_8472_level < 0) + return -EOPNOTSUPP; + + /* Check if this module requires the (unsupported) address + * change operation. + */ + diag_type = efx_mcdi_phy_diag_type(efx); + + if ((sff_8472_level == 0) || + (diag_type & SFF_DIAG_ADDR_CHANGE)) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + + case MC_CMD_MEDIA_QSFP_PLUS: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + default: return -EOPNOTSUPP; } + + return 0; } static const struct efx_phy_operations efx_mcdi_phy_ops = { diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 8603e397097e..5b56c24b6ed2 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -248,7 +248,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], dev->ethtool_ops = &vsw_ethtool_ops; dev->watchdog_timeo = VSW_TX_TIMEOUT; - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG; + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; /* MTU range: 68 - 65535 */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 4bb04aaf9650..6a4e8e1bbd90 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9221,8 +9221,7 @@ static int niu_get_of_props(struct niu *np) phy_type = of_get_property(dp, "phy-type", &prop_len); if (!phy_type) { - netdev_err(dev, "%s: OF node lacks phy-type property\n", - dp->full_name); + netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp); return -EINVAL; } @@ -9232,26 +9231,25 @@ static int niu_get_of_props(struct niu *np) strcpy(np->vpd.phy_type, phy_type); if (niu_phy_type_prop_decode(np, np->vpd.phy_type)) { - netdev_err(dev, "%s: Illegal phy string [%s]\n", - dp->full_name, np->vpd.phy_type); + netdev_err(dev, "%pOF: Illegal phy string [%s]\n", + dp, np->vpd.phy_type); return -EINVAL; } mac_addr = of_get_property(dp, "local-mac-address", &prop_len); if (!mac_addr) { - netdev_err(dev, "%s: OF node lacks local-mac-address property\n", - dp->full_name); + netdev_err(dev, "%pOF: OF node lacks local-mac-address property\n", + dp); return -EINVAL; } if (prop_len != dev->addr_len) { - netdev_err(dev, "%s: OF MAC address prop len (%d) is wrong\n", - dp->full_name, prop_len); + netdev_err(dev, "%pOF: OF MAC address prop len (%d) is wrong\n", + dp, prop_len); } memcpy(dev->dev_addr, mac_addr, dev->addr_len); if (!is_valid_ether_addr(&dev->dev_addr[0])) { - netdev_err(dev, "%s: OF MAC address is invalid\n", - dp->full_name); - netdev_err(dev, "%s: [ %pM ]\n", dp->full_name, dev->dev_addr); + netdev_err(dev, "%pOF: OF MAC address is invalid\n", dp); + netdev_err(dev, "%pOF: [ %pM ]\n", dp, dev->dev_addr); return -EINVAL; } @@ -10027,8 +10025,8 @@ static int niu_of_probe(struct platform_device *op) reg = of_get_property(op->dev.of_node, "reg", NULL); if (!reg) { - dev_err(&op->dev, "%s: No 'reg' property, aborting\n", - op->dev.of_node->full_name); + dev_err(&op->dev, "%pOF: No 'reg' property, aborting\n", + op->dev.of_node); return -ENODEV; } diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 75b167e3fe98..0b95105f7060 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac, dev->watchdog_timeo = VNET_TX_TIMEOUT; dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | - NETIF_F_IP_CSUM | NETIF_F_SG; + NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; /* MTU range: 68 - 65535 */ diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 9e86833249d4..ecf456c7b6d1 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -303,7 +303,7 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev, return skb; } -static inline void vnet_fullcsum(struct sk_buff *skb) +static inline void vnet_fullcsum_ipv4(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); int offset = skb_transport_offset(skb); @@ -335,6 +335,40 @@ static inline void vnet_fullcsum(struct sk_buff *skb) } } +#if IS_ENABLED(CONFIG_IPV6) +static inline void vnet_fullcsum_ipv6(struct sk_buff *skb) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int offset = skb_transport_offset(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + return; + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return; + skb->ip_summed = CHECKSUM_NONE; + skb->csum_level = 1; + skb->csum = 0; + if (ip6h->nexthdr == IPPROTO_TCP) { + struct tcphdr *ptcp = tcp_hdr(skb); + + ptcp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + skb->len - offset, IPPROTO_TCP, + skb->csum); + } else if (ip6h->nexthdr == IPPROTO_UDP) { + struct udphdr *pudp = udp_hdr(skb); + + pudp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + skb->len - offset, IPPROTO_UDP, + skb->csum); + } +} +#endif + static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) { struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); @@ -394,9 +428,14 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_reset_transport_header(skb); skb_set_transport_header(skb, ihl); - vnet_fullcsum(skb); + vnet_fullcsum_ipv4(skb); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + skb_set_transport_header(skb, + sizeof(struct ipv6hdr)); + vnet_fullcsum_ipv6(skb); +#endif } } if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { @@ -1115,24 +1154,47 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) if (skb->ip_summed == CHECKSUM_PARTIAL) start = skb_checksum_start_offset(skb); if (start) { - struct iphdr *iph = ip_hdr(nskb); int offset = start + nskb->csum_offset; + /* copy the headers, no csum here */ if (skb_copy_bits(skb, 0, nskb->data, start)) { dev_kfree_skb(nskb); dev_kfree_skb(skb); return NULL; } + + /* copy the rest, with csum calculation */ *(__sum16 *)(skb->data + offset) = 0; csum = skb_copy_and_csum_bits(skb, start, nskb->data + start, skb->len - start, 0); - if (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP) { - csum = csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - start, - iph->protocol, csum); + + /* add in the header checksums */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(nskb); + + if (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP) { + csum = csum_tcpudp_magic(iph->saddr, + iph->daddr, + skb->len - start, + iph->protocol, + csum); + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = ipv6_hdr(nskb); + + if (ip6h->nexthdr == IPPROTO_TCP || + ip6h->nexthdr == IPPROTO_UDP) { + csum = csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb->len - start, + ip6h->nexthdr, + csum); + } } + + /* save the final result */ *(__sum16 *)(nskb->data + offset) = csum; nskb->ip_summed = CHECKSUM_NONE; @@ -1318,8 +1380,14 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, if (unlikely(!skb)) goto out_dropped; - if (skb->ip_summed == CHECKSUM_PARTIAL) - vnet_fullcsum(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->protocol == htons(ETH_P_IP)) + vnet_fullcsum_ipv4(skb); +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + vnet_fullcsum_ipv6(skb); +#endif + } dr = &port->vio.drings[VIO_DRIVER_TX_RING]; i = skb_get_queue_mapping(skb); diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 3b91257683bc..e1b55b8fb8e0 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -17,6 +17,7 @@ #include <linux/netdevice.h> #include <linux/tcp.h> +#include <linux/interrupt.h> #include "dwc-xlgmac.h" #include "dwc-xlgmac-reg.h" diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index badd0a8caeb9..c8776dbf1a55 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1321,8 +1321,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) phy = of_phy_connect(priv->ndev, slave->data->phy_node, &cpsw_adjust_link, 0, slave->data->phy_if); if (!phy) { - dev_err(priv->dev, "phy \"%s\" not found on slave %d\n", - slave->data->phy_node->full_name, + dev_err(priv->dev, "phy \"%pOF\" not found on slave %d\n", + slave->data->phy_node, slave->slave_num); return; } @@ -2670,8 +2670,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, parp = of_get_property(slave_node, "phy_id", &lenp); if (slave_data->phy_node) { dev_dbg(&pdev->dev, - "slave[%d] using phy-handle=\"%s\"\n", - i, slave_data->phy_node->full_name); + "slave[%d] using phy-handle=\"%pOF\"\n", + i, slave_data->phy_node); } else if (of_phy_is_fixed_link(slave_node)) { /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 64d5527feb2a..4bb561856af5 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1480,8 +1480,8 @@ static int emac_dev_open(struct net_device *ndev) phydev = of_phy_connect(ndev, priv->phy_node, &emac_adjust_link, 0, 0); if (!phydev) { - dev_err(emac_dev, "could not connect to phy %s\n", - priv->phy_node->full_name); + dev_err(emac_dev, "could not connect to phy %pOF\n", + priv->phy_node); ret = -ENODEV; goto err; } diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index d73da8afe08e..60abc9250f56 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1089,7 +1089,7 @@ static int temac_of_probe(struct platform_device *op) lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0); if (lp->phy_node) - dev_dbg(lp->dev, "using PHY node %s (%p)\n", np->full_name, np); + dev_dbg(lp->dev, "using PHY node %pOF (%p)\n", np, np); /* Add the device attributes */ rc = sysfs_create_group(&lp->dev->kobj, &temac_attr_group); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index af27f7d1cbf3..5ef626331f85 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -389,7 +389,7 @@ struct axidma_bd { * @dma_err_tasklet: Tasklet structure to process Axi DMA errors * @tx_irq: Axidma TX IRQ number * @rx_irq: Axidma RX IRQ number - * @phy_type: Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X + * @phy_mode: Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X * @options: AxiEthernet option word * @last_link: Phy link state in which the PHY was negotiated earlier * @features: Stores the extended features supported by the axienet hw @@ -432,7 +432,7 @@ struct axienet_local { int tx_irq; int rx_irq; - u32 phy_type; + phy_interface_t phy_mode; u32 options; /* Current options word */ u32 last_link; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 33c595f4691d..e74e1e897864 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -531,11 +531,11 @@ static void axienet_adjust_link(struct net_device *ndev) link_state = phy->speed | (phy->duplex << 1) | phy->link; if (lp->last_link != link_state) { if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) { - if (lp->phy_type == XAE_PHY_TYPE_1000BASE_X) + if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) setspeed = 0; } else { if ((phy->speed == SPEED_1000) && - (lp->phy_type == XAE_PHY_TYPE_MII)) + (lp->phy_mode == PHY_INTERFACE_MODE_MII)) setspeed = 0; } @@ -935,15 +935,8 @@ static int axienet_open(struct net_device *ndev) return ret; if (lp->phy_node) { - if (lp->phy_type == XAE_PHY_TYPE_GMII) { - phydev = of_phy_connect(lp->ndev, lp->phy_node, - axienet_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); - } else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) { - phydev = of_phy_connect(lp->ndev, lp->phy_node, - axienet_adjust_link, 0, - PHY_INTERFACE_MODE_RGMII_ID); - } + phydev = of_phy_connect(lp->ndev, lp->phy_node, + axienet_adjust_link, 0, lp->phy_mode); if (!phydev) dev_err(lp->dev, "of_phy_connect() failed\n"); @@ -1539,7 +1532,38 @@ static int axienet_probe(struct platform_device *pdev) * the device-tree and accordingly set flags. */ of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem); - of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type); + + /* Start with the proprietary, and broken phy_type */ + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &value); + if (!ret) { + netdev_warn(ndev, "Please upgrade your device tree binary blob to use phy-mode"); + switch (value) { + case XAE_PHY_TYPE_MII: + lp->phy_mode = PHY_INTERFACE_MODE_MII; + break; + case XAE_PHY_TYPE_GMII: + lp->phy_mode = PHY_INTERFACE_MODE_GMII; + break; + case XAE_PHY_TYPE_RGMII_2_0: + lp->phy_mode = PHY_INTERFACE_MODE_RGMII_ID; + break; + case XAE_PHY_TYPE_SGMII: + lp->phy_mode = PHY_INTERFACE_MODE_SGMII; + break; + case XAE_PHY_TYPE_1000BASE_X: + lp->phy_mode = PHY_INTERFACE_MODE_1000BASEX; + break; + default: + ret = -EINVAL; + goto free_netdev; + } + } else { + lp->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (lp->phy_mode < 0) { + ret = -EINVAL; + goto free_netdev; + } + } /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index de8156c6b292..745d57ae95d7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -715,6 +715,7 @@ free_dst: static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct net_device *dev, + struct geneve_sock *gs4, struct flowi4 *fl4, const struct ip_tunnel_info *info) { @@ -724,7 +725,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct rtable *rt = NULL; __u8 tos; - if (!rcu_dereference(geneve->sock4)) + if (!gs4) return ERR_PTR(-EIO); memset(fl4, 0, sizeof(*fl4)); @@ -764,6 +765,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct net_device *dev, + struct geneve_sock *gs6, struct flowi6 *fl6, const struct ip_tunnel_info *info) { @@ -771,10 +773,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct geneve_dev *geneve = netdev_priv(dev); struct dst_entry *dst = NULL; struct dst_cache *dst_cache; - struct geneve_sock *gs6; __u8 prio; - gs6 = rcu_dereference(geneve->sock6); if (!gs6) return ERR_PTR(-EIO); @@ -827,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 df; int err; - rt = geneve_get_v4_rt(skb, dev, &fl4, info); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -866,7 +866,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - dst = geneve_get_v6_dst(skb, dev, &fl6, info); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -951,8 +951,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct flowi4 fl4; + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); - rt = geneve_get_v4_rt(skb, dev, &fl4, info); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -962,8 +963,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct flowi6 fl6; + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); - dst = geneve_get_v6_dst(skb, dev, &fl6, info); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1014,16 +1016,22 @@ static struct device_type geneve_type = { * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_udp_tunnel_add. */ -static void geneve_push_rx_ports(struct net_device *dev) +static void geneve_offload_rx_ports(struct net_device *dev, bool push) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; rcu_read_lock(); - list_for_each_entry_rcu(gs, &gn->sock_list, list) - udp_tunnel_push_rx_port(dev, gs->sock, - UDP_TUNNEL_TYPE_GENEVE); + list_for_each_entry_rcu(gs, &gn->sock_list, list) { + if (push) { + udp_tunnel_push_rx_port(dev, gs->sock, + UDP_TUNNEL_TYPE_GENEVE); + } else { + udp_tunnel_drop_rx_port(dev, gs->sock, + UDP_TUNNEL_TYPE_GENEVE); + } + } rcu_read_unlock(); } @@ -1140,6 +1148,15 @@ static bool is_tnl_info_zero(const struct ip_tunnel_info *info) return true; } +static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, + struct ip_tunnel_info *b) +{ + if (ip_tunnel_info_af(a) == AF_INET) + return a->key.u.ipv4.dst == b->key.u.ipv4.dst; + else + return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); +} + static int geneve_configure(struct net *net, struct net_device *dev, const struct ip_tunnel_info *info, bool metadata, bool ipv6_rx_csum) @@ -1197,24 +1214,22 @@ static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) info->key.tp_dst = htons(dst_port); } -static int geneve_newlink(struct net *net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int geneve_nl2info(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], struct ip_tunnel_info *info, + bool *metadata, bool *use_udp6_rx_checksums, + bool changelink) { - bool use_udp6_rx_checksums = false; - struct ip_tunnel_info info; - bool metadata = false; - - init_tnl_info(&info, GENEVE_UDP_PORT); - if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) return -EINVAL; if (data[IFLA_GENEVE_REMOTE]) { - info.key.u.ipv4.dst = + if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) + return -EOPNOTSUPP; + + info->key.u.ipv4.dst = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); - if (IN_MULTICAST(ntohl(info.key.u.ipv4.dst))) { + if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) { netdev_dbg(dev, "multicast remote is unsupported\n"); return -EINVAL; } @@ -1222,21 +1237,24 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_REMOTE6]) { #if IS_ENABLED(CONFIG_IPV6) - info.mode = IP_TUNNEL_INFO_IPV6; - info.key.u.ipv6.dst = + if (changelink && (ip_tunnel_info_af(info) == AF_INET)) + return -EOPNOTSUPP; + + info->mode = IP_TUNNEL_INFO_IPV6; + info->key.u.ipv6.dst = nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); - if (ipv6_addr_type(&info.key.u.ipv6.dst) & + if (ipv6_addr_type(&info->key.u.ipv6.dst) & IPV6_ADDR_LINKLOCAL) { netdev_dbg(dev, "link-local remote is unsupported\n"); return -EINVAL; } - if (ipv6_addr_is_multicast(&info.key.u.ipv6.dst)) { + if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { netdev_dbg(dev, "multicast remote is unsupported\n"); return -EINVAL; } - info.key.tun_flags |= TUNNEL_CSUM; - use_udp6_rx_checksums = true; + info->key.tun_flags |= TUNNEL_CSUM; + *use_udp6_rx_checksums = true; #else return -EPFNOSUPPORT; #endif @@ -1245,48 +1263,169 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_ID]) { __u32 vni; __u8 tvni[3]; + __be64 tunid; vni = nla_get_u32(data[IFLA_GENEVE_ID]); tvni[0] = (vni & 0x00ff0000) >> 16; tvni[1] = (vni & 0x0000ff00) >> 8; tvni[2] = vni & 0x000000ff; - info.key.tun_id = vni_to_tunnel_id(tvni); + tunid = vni_to_tunnel_id(tvni); + if (changelink && (tunid != info->key.tun_id)) + return -EOPNOTSUPP; + info->key.tun_id = tunid; } + if (data[IFLA_GENEVE_TTL]) - info.key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); + info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); if (data[IFLA_GENEVE_TOS]) - info.key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); if (data[IFLA_GENEVE_LABEL]) { - info.key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & + info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & IPV6_FLOWLABEL_MASK; - if (info.key.label && (!(info.mode & IP_TUNNEL_INFO_IPV6))) + if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) return -EINVAL; } - if (data[IFLA_GENEVE_PORT]) - info.key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); + if (data[IFLA_GENEVE_PORT]) { + if (changelink) + return -EOPNOTSUPP; + info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); + } - if (data[IFLA_GENEVE_COLLECT_METADATA]) - metadata = true; + if (data[IFLA_GENEVE_COLLECT_METADATA]) { + if (changelink) + return -EOPNOTSUPP; + *metadata = true; + } - if (data[IFLA_GENEVE_UDP_CSUM] && - nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) - info.key.tun_flags |= TUNNEL_CSUM; + if (data[IFLA_GENEVE_UDP_CSUM]) { + if (changelink) + return -EOPNOTSUPP; + if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) + info->key.tun_flags |= TUNNEL_CSUM; + } + + if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { + if (changelink) + return -EOPNOTSUPP; + if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) + info->key.tun_flags &= ~TUNNEL_CSUM; + } + + if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { + if (changelink) + return -EOPNOTSUPP; + if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) + *use_udp6_rx_checksums = false; + } + + return 0; +} - if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && - nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) - info.key.tun_flags &= ~TUNNEL_CSUM; +static int geneve_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + bool use_udp6_rx_checksums = false; + struct ip_tunnel_info info; + bool metadata = false; + int err; - if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] && - nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) - use_udp6_rx_checksums = false; + init_tnl_info(&info, GENEVE_UDP_PORT); + err = geneve_nl2info(dev, tb, data, &info, &metadata, + &use_udp6_rx_checksums, false); + if (err) + return err; return geneve_configure(net, dev, &info, metadata, use_udp6_rx_checksums); } +/* Quiesces the geneve device data path for both TX and RX. + * + * On transmit geneve checks for non-NULL geneve_sock before it proceeds. + * So, if we set that socket to NULL under RCU and wait for synchronize_net() + * to complete for the existing set of in-flight packets to be transmitted, + * then we would have quiesced the transmit data path. All the future packets + * will get dropped until we unquiesce the data path. + * + * On receive geneve dereference the geneve_sock stashed in the socket. So, + * if we set that to NULL under RCU and wait for synchronize_net() to + * complete, then we would have quiesced the receive data path. + */ +static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, + struct geneve_sock **gs6) +{ + *gs4 = rtnl_dereference(geneve->sock4); + rcu_assign_pointer(geneve->sock4, NULL); + if (*gs4) + rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); +#if IS_ENABLED(CONFIG_IPV6) + *gs6 = rtnl_dereference(geneve->sock6); + rcu_assign_pointer(geneve->sock6, NULL); + if (*gs6) + rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); +#else + *gs6 = NULL; +#endif + synchronize_net(); +} + +/* Resumes the geneve device data path for both TX and RX. */ +static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, + struct geneve_sock __maybe_unused *gs6) +{ + rcu_assign_pointer(geneve->sock4, gs4); + if (gs4) + rcu_assign_sk_user_data(gs4->sock->sk, gs4); +#if IS_ENABLED(CONFIG_IPV6) + rcu_assign_pointer(geneve->sock6, gs6); + if (gs6) + rcu_assign_sk_user_data(gs6->sock->sk, gs6); +#endif + synchronize_net(); +} + +static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs4, *gs6; + struct ip_tunnel_info info; + bool metadata; + bool use_udp6_rx_checksums; + int err; + + /* If the geneve device is configured for metadata (or externally + * controlled, for example, OVS), then nothing can be changed. + */ + if (geneve->collect_md) + return -EOPNOTSUPP; + + /* Start with the existing info. */ + memcpy(&info, &geneve->info, sizeof(info)); + metadata = geneve->collect_md; + use_udp6_rx_checksums = geneve->use_udp6_rx_checksums; + err = geneve_nl2info(dev, tb, data, &info, &metadata, + &use_udp6_rx_checksums, true); + if (err) + return err; + + if (!geneve_dst_addr_equal(&geneve->info, &info)) + dst_cache_reset(&info.dst_cache); + + geneve_quiesce(geneve, &gs4, &gs6); + geneve->info = info; + geneve->collect_md = metadata; + geneve->use_udp6_rx_checksums = use_udp6_rx_checksums; + geneve_unquiesce(geneve, gs4, gs6); + + return 0; +} + static void geneve_dellink(struct net_device *dev, struct list_head *head) { struct geneve_dev *geneve = netdev_priv(dev); @@ -1375,6 +1514,7 @@ static struct rtnl_link_ops geneve_link_ops __read_mostly = { .setup = geneve_setup, .validate = geneve_validate, .newlink = geneve_newlink, + .changelink = geneve_changelink, .dellink = geneve_dellink, .get_size = geneve_get_size, .fill_info = geneve_fill_info, @@ -1426,8 +1566,14 @@ static int geneve_netdevice_event(struct notifier_block *unused, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) - geneve_push_rx_ports(dev); + if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || + event == NETDEV_UDP_TUNNEL_DROP_INFO) { + geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); + } else if (event == NETDEV_UNREGISTER) { + geneve_offload_rx_ports(dev, false); + } else if (event == NETDEV_REGISTER) { + geneve_offload_rx_ports(dev, true); + } return NOTIFY_DONE; } diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index dec6b76bc0fb..cde41200f40a 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -581,7 +581,7 @@ static int __init setup_adapter(int card_base, int type, int n) priv->param.dma = -1; INIT_WORK(&priv->rx_work, rx_bh); dev->ml_priv = priv; - sprintf(dev->name, "dmascc%i", 2 * n + i); + snprintf(dev->name, sizeof(dev->name), "dmascc%i", 2 * n + i); dev->base_addr = card_base; dev->irq = irq; dev->netdev_ops = &scc_netdev_ops; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d6c25580f8dd..f2cef5aaed1f 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -147,7 +147,6 @@ struct hv_netvsc_packet { struct netvsc_device_info { unsigned char mac_adr[ETH_ALEN]; int ring_size; - u32 max_num_vrss_chns; u32 num_chn; }; @@ -183,13 +182,16 @@ struct rndis_device { /* Interface */ struct rndis_message; struct netvsc_device; -int netvsc_device_add(struct hv_device *device, - const struct netvsc_device_info *info); +struct net_device_context; + +struct netvsc_device *netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *info); +int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx); void netvsc_device_remove(struct hv_device *device); -int netvsc_send(struct hv_device *device, +int netvsc_send(struct net_device_context *ndc, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, - struct hv_page_buffer **page_buffer, + struct hv_page_buffer *page_buffer, struct sk_buff *skb); void netvsc_linkstatus_callback(struct hv_device *device_obj, struct rndis_message *resp); @@ -200,10 +202,11 @@ int netvsc_recv_callback(struct net_device *net, const struct ndis_pkt_8021q_info *vlan); void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); +bool rndis_filter_opened(const struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); -int rndis_filter_device_add(struct hv_device *dev, - struct netvsc_device_info *info); +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, + struct netvsc_device_info *info); void rndis_filter_update(struct netvsc_device *nvdev); void rndis_filter_device_remove(struct hv_device *dev, struct netvsc_device *nvdev); @@ -215,7 +218,8 @@ int rndis_filter_receive(struct net_device *ndev, struct vmbus_channel *channel, void *data, u32 buflen); -int rndis_filter_set_device_mac(struct net_device *ndev, char *mac); +int rndis_filter_set_device_mac(struct netvsc_device *ndev, + const char *mac); void netvsc_switch_datapath(struct net_device *nv_dev, bool vf); @@ -654,13 +658,10 @@ struct recv_comp_data { u32 status; }; -/* Netvsc Receive Slots Max */ -#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1) - struct multi_recv_comp { - void *buf; /* queued receive completions */ - u32 first; /* first data entry */ - u32 next; /* next entry for writing */ + struct recv_comp_data *slots; + u32 first; /* first data entry */ + u32 next; /* next entry for writing */ }; struct netvsc_stats { @@ -724,6 +725,7 @@ struct net_device_context { /* Per channel data */ struct netvsc_channel { struct vmbus_channel *channel; + struct netvsc_device *net_device; const struct vmpacket_descriptor *desc; struct napi_struct napi; struct multi_send_data msd; @@ -746,7 +748,7 @@ struct netvsc_device { u32 recv_buf_size; u32 recv_buf_gpadl_handle; u32 recv_section_cnt; - struct nvsp_1_receive_buffer_section *recv_section; + u32 recv_completion_cnt; /* Send buffer allocated by us */ void *send_buf; @@ -774,8 +776,6 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - atomic_t num_outstanding_recvs; - atomic_t open_cnt; struct netvsc_channel chan_table[VRSS_CHANNEL_MAX]; @@ -783,18 +783,6 @@ struct netvsc_device { struct rcu_head rcu; }; -static inline struct netvsc_device * -net_device_to_netvsc_device(struct net_device *ndev) -{ - return ((struct net_device_context *)netdev_priv(ndev))->nvdev; -} - -static inline struct netvsc_device * -hv_device_to_netvsc_device(struct hv_device *device) -{ - return net_device_to_netvsc_device(hv_get_drvdata(device)); -} - /* NdisInitialize message */ struct rndis_initialize_request { u32 req_id; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0a9167dd72fb..c64934c64dca 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -29,6 +29,9 @@ #include <linux/netdevice.h> #include <linux/if_ether.h> #include <linux/vmalloc.h> +#include <linux/rtnetlink.h> +#include <linux/prefetch.h> + #include <asm/sync_bitops.h> #include "hyperv_net.h" @@ -41,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct hv_device *dev = net_device_ctx->device_ctx; - struct netvsc_device *nv_dev = net_device_ctx->nvdev; + struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; memset(init_pkt, 0, sizeof(struct nvsp_message)); @@ -69,9 +72,6 @@ static struct netvsc_device *alloc_net_device(void) if (!net_device) return NULL; - net_device->chan_table[0].mrc.buf - = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data)); - init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); @@ -89,7 +89,7 @@ static void free_netvsc_device(struct rcu_head *head) int i; for (i = 0; i < VRSS_CHANNEL_MAX; i++) - vfree(nvdev->chan_table[i].mrc.buf); + vfree(nvdev->chan_table[i].mrc.slots); kfree(nvdev); } @@ -103,7 +103,8 @@ static void netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; struct net_device *ndev = hv_get_drvdata(device); - struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); + struct net_device_context *ndc = netdev_priv(ndev); + struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); int ret; /* @@ -167,12 +168,6 @@ static void netvsc_destroy_buf(struct hv_device *device) net_device->recv_buf = NULL; } - if (net_device->recv_section) { - net_device->recv_section_cnt = 0; - kfree(net_device->recv_section); - net_device->recv_section = NULL; - } - /* Deal with the send buffer we may have setup. * If we got a send section size, it means we received a * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent @@ -235,11 +230,26 @@ static void netvsc_destroy_buf(struct hv_device *device) kfree(net_device->send_section_map); } +int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) +{ + struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; + int node = cpu_to_node(nvchan->channel->target_cpu); + size_t size; + + size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data); + nvchan->mrc.slots = vzalloc_node(size, node); + if (!nvchan->mrc.slots) + nvchan->mrc.slots = vzalloc(size); + + return nvchan->mrc.slots ? 0 : -ENOMEM; +} + static int netvsc_init_buf(struct hv_device *device, struct netvsc_device *net_device) { int ret = 0; struct nvsp_message *init_packet; + struct nvsp_1_message_send_receive_buffer_complete *resp; struct net_device *ndev; size_t map_words; int node; @@ -296,43 +306,41 @@ static int netvsc_init_buf(struct hv_device *device, wait_for_completion(&net_device->channel_init_wait); /* Check the response */ - if (init_packet->msg.v1_msg. - send_recv_buf_complete.status != NVSP_STAT_SUCCESS) { - netdev_err(ndev, "Unable to complete receive buffer " - "initialization with NetVsp - status %d\n", - init_packet->msg.v1_msg. - send_recv_buf_complete.status); + resp = &init_packet->msg.v1_msg.send_recv_buf_complete; + if (resp->status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, + "Unable to complete receive buffer initialization with NetVsp - status %d\n", + resp->status); ret = -EINVAL; goto cleanup; } /* Parse the response */ + netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n", + resp->num_sections, resp->sections[0].sub_alloc_size, + resp->sections[0].num_sub_allocs); - net_device->recv_section_cnt = init_packet->msg. - v1_msg.send_recv_buf_complete.num_sections; - - net_device->recv_section = kmemdup( - init_packet->msg.v1_msg.send_recv_buf_complete.sections, - net_device->recv_section_cnt * - sizeof(struct nvsp_1_receive_buffer_section), - GFP_KERNEL); - if (net_device->recv_section == NULL) { - ret = -EINVAL; - goto cleanup; - } + net_device->recv_section_cnt = resp->num_sections; /* * For 1st release, there should only be 1 section that represents the * entire receive buffer */ if (net_device->recv_section_cnt != 1 || - net_device->recv_section->offset != 0) { + resp->sections[0].offset != 0) { ret = -EINVAL; goto cleanup; } - /* Now setup the send buffer. - */ + /* Setup receive completion ring */ + net_device->recv_completion_cnt + = round_up(resp->sections[0].num_sub_allocs + 1, + PAGE_SIZE / sizeof(u64)); + ret = netvsc_alloc_recv_comp_ring(net_device, 0); + if (ret) + goto cleanup; + + /* Now setup the send buffer. */ net_device->send_buf = vzalloc_node(net_device->send_buf_size, node); if (!net_device->send_buf) net_device->send_buf = vzalloc(net_device->send_buf_size); @@ -549,7 +557,8 @@ void netvsc_device_remove(struct hv_device *device) { struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct netvsc_device *net_device = net_device_ctx->nvdev; + struct netvsc_device *net_device + = rtnl_dereference(net_device_ctx->nvdev); int i; netvsc_disconnect_vsp(device); @@ -692,7 +701,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, u32 pend_size, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, - struct hv_page_buffer **pb, + struct hv_page_buffer *pb, struct sk_buff *skb) { char *start = net_device->send_buf; @@ -713,9 +722,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, } for (i = 0; i < page_count; i++) { - char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT); - u32 offset = (*pb)[i].offset; - u32 len = (*pb)[i].len; + char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT); + u32 offset = pb[i].offset; + u32 len = pb[i].len; memcpy(dest, (src + offset), len); msg_size += len; @@ -734,7 +743,7 @@ static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, struct netvsc_device *net_device, - struct hv_page_buffer **pb, + struct hv_page_buffer *pb, struct sk_buff *skb) { struct nvsp_message nvmsg; @@ -745,7 +754,6 @@ static inline int netvsc_send_pkt( struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; - struct hv_page_buffer *pgbuf; u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; @@ -771,11 +779,11 @@ static inline int netvsc_send_pkt( return -ENODEV; if (packet->page_buf_cnt) { - pgbuf = packet->cp_partial ? (*pb) + - packet->rmsg_pgcnt : (*pb); + if (packet->cp_partial) + pb += packet->rmsg_pgcnt; + ret = vmbus_sendpacket_pagebuffer_ctl(out_channel, - pgbuf, - packet->page_buf_cnt, + pb, packet->page_buf_cnt, &nvmsg, sizeof(struct nvsp_message), req_id, @@ -800,8 +808,10 @@ static inline int netvsc_send_pkt( ret = -ENOSPC; } } else { - netdev_err(ndev, "Unable to send packet %p ret %d\n", - packet, ret); + netdev_err(ndev, + "Unable to send packet pages %u len %u, ret %d\n", + packet->page_buf_cnt, packet->total_data_buflen, + ret); } return ret; @@ -819,13 +829,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send, msdp->count = 0; } -int netvsc_send(struct hv_device *device, +/* RCU already held by caller */ +int netvsc_send(struct net_device_context *ndev_ctx, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, - struct hv_page_buffer **pb, + struct hv_page_buffer *pb, struct sk_buff *skb) { - struct netvsc_device *net_device = hv_device_to_netvsc_device(device); + struct netvsc_device *net_device + = rcu_dereference_bh(ndev_ctx->nvdev); + struct hv_device *device = ndev_ctx->device_ctx; int ret = 0; struct netvsc_channel *nvchan; u32 pktlen = packet->total_data_buflen, msd_len = 0; @@ -837,7 +850,7 @@ int netvsc_send(struct hv_device *device, bool xmit_more = (skb != NULL) ? skb->xmit_more : false; /* If device is rescinded, return error and packet will get dropped. */ - if (unlikely(net_device->destroy)) + if (unlikely(!net_device || net_device->destroy)) return -ENODEV; /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get @@ -942,130 +955,94 @@ send_now: return ret; } -static int netvsc_send_recv_completion(struct vmbus_channel *channel, - u64 transaction_id, u32 status) +/* Send pending recv completions */ +static int send_recv_completions(struct netvsc_channel *nvchan) { - struct nvsp_message recvcompMessage; + struct netvsc_device *nvdev = nvchan->net_device; + struct multi_recv_comp *mrc = &nvchan->mrc; + struct recv_comp_msg { + struct nvsp_message_header hdr; + u32 status; + } __packed; + struct recv_comp_msg msg = { + .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, + }; int ret; - recvcompMessage.hdr.msg_type = - NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE; - - recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status; - - /* Send the completion */ - ret = vmbus_sendpacket(channel, &recvcompMessage, - sizeof(struct nvsp_message_header) + sizeof(u32), - transaction_id, VM_PKT_COMP, 0); - - return ret; -} - -static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx, - u32 *filled, u32 *avail) -{ - struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; - u32 first = mrc->first; - u32 next = mrc->next; - - *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next : - next - first; - - *avail = NETVSC_RECVSLOT_MAX - *filled - 1; -} + while (mrc->first != mrc->next) { + const struct recv_comp_data *rcd + = mrc->slots + mrc->first; -/* Read the first filled slot, no change to index */ -static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device - *nvdev, u16 q_idx) -{ - struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; - u32 filled, avail; + msg.status = rcd->status; + ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg), + rcd->tid, VM_PKT_COMP, 0); + if (unlikely(ret)) + return ret; - if (unlikely(!mrc->buf)) - return NULL; + if (++mrc->first == nvdev->recv_completion_cnt) + mrc->first = 0; + } - count_recv_comp_slot(nvdev, q_idx, &filled, &avail); - if (!filled) - return NULL; + /* receive completion ring has been emptied */ + if (unlikely(nvdev->destroy)) + wake_up(&nvdev->wait_drain); - return mrc->buf + mrc->first * sizeof(struct recv_comp_data); + return 0; } -/* Put the first filled slot back to available pool */ -static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx) +/* Count how many receive completions are outstanding */ +static void recv_comp_slot_avail(const struct netvsc_device *nvdev, + const struct multi_recv_comp *mrc, + u32 *filled, u32 *avail) { - struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; - int num_recv; + u32 count = nvdev->recv_completion_cnt; - mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX; - - num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs); + if (mrc->next >= mrc->first) + *filled = mrc->next - mrc->first; + else + *filled = (count - mrc->first) + mrc->next; - if (nvdev->destroy && num_recv == 0) - wake_up(&nvdev->wait_drain); + *avail = count - *filled - 1; } -/* Check and send pending recv completions */ -static void netvsc_chk_recv_comp(struct netvsc_device *nvdev, - struct vmbus_channel *channel, u16 q_idx) +/* Add receive complete to ring to send to host. */ +static void enq_receive_complete(struct net_device *ndev, + struct netvsc_device *nvdev, u16 q_idx, + u64 tid, u32 status) { + struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx]; + struct multi_recv_comp *mrc = &nvchan->mrc; struct recv_comp_data *rcd; - int ret; - - while (true) { - rcd = read_recv_comp_slot(nvdev, q_idx); - if (!rcd) - break; + u32 filled, avail; - ret = netvsc_send_recv_completion(channel, rcd->tid, - rcd->status); - if (ret) - break; + recv_comp_slot_avail(nvdev, mrc, &filled, &avail); - put_recv_comp_slot(nvdev, q_idx); + if (unlikely(filled > NAPI_POLL_WEIGHT)) { + send_recv_completions(nvchan); + recv_comp_slot_avail(nvdev, mrc, &filled, &avail); } -} - -#define NETVSC_RCD_WATERMARK 80 - -/* Get next available slot */ -static inline struct recv_comp_data *get_recv_comp_slot( - struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx) -{ - struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; - u32 filled, avail, next; - struct recv_comp_data *rcd; - - if (unlikely(!nvdev->recv_section)) - return NULL; - - if (unlikely(!mrc->buf)) - return NULL; - - if (atomic_read(&nvdev->num_outstanding_recvs) > - nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100) - netvsc_chk_recv_comp(nvdev, channel, q_idx); - - count_recv_comp_slot(nvdev, q_idx, &filled, &avail); - if (!avail) - return NULL; - next = mrc->next; - rcd = mrc->buf + next * sizeof(struct recv_comp_data); - mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX; + if (unlikely(!avail)) { + netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", + q_idx, tid); + return; + } - atomic_inc(&nvdev->num_outstanding_recvs); + rcd = mrc->slots + mrc->next; + rcd->tid = tid; + rcd->status = status; - return rcd; + if (++mrc->next == nvdev->recv_completion_cnt) + mrc->next = 0; } static int netvsc_receive(struct net_device *ndev, - struct netvsc_device *net_device, - struct net_device_context *net_device_ctx, - struct hv_device *device, - struct vmbus_channel *channel, - const struct vmpacket_descriptor *desc, - struct nvsp_message *nvsp) + struct netvsc_device *net_device, + struct net_device_context *net_device_ctx, + struct hv_device *device, + struct vmbus_channel *channel, + const struct vmpacket_descriptor *desc, + struct nvsp_message *nvsp) { const struct vmtransfer_page_packet_header *vmxferpage_packet = container_of(desc, const struct vmtransfer_page_packet_header, d); @@ -1074,7 +1051,6 @@ static int netvsc_receive(struct net_device *ndev, u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; - int ret; /* Make sure this is a valid nvsp packet */ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { @@ -1105,25 +1081,9 @@ static int netvsc_receive(struct net_device *ndev, channel, data, buflen); } - if (net_device->chan_table[q_idx].mrc.buf) { - struct recv_comp_data *rcd; + enq_receive_complete(ndev, net_device, q_idx, + vmxferpage_packet->d.trans_id, status); - rcd = get_recv_comp_slot(net_device, channel, q_idx); - if (rcd) { - rcd->tid = vmxferpage_packet->d.trans_id; - rcd->status = status; - } else { - netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", - q_idx, vmxferpage_packet->d.trans_id); - } - } else { - ret = netvsc_send_recv_completion(channel, - vmxferpage_packet->d.trans_id, - status); - if (ret) - netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n", - q_idx, vmxferpage_packet->d.trans_id, ret); - } return count; } @@ -1219,11 +1179,10 @@ int netvsc_poll(struct napi_struct *napi, int budget) { struct netvsc_channel *nvchan = container_of(napi, struct netvsc_channel, napi); + struct netvsc_device *net_device = nvchan->net_device; struct vmbus_channel *channel = nvchan->channel; struct hv_device *device = netvsc_channel_to_device(channel); - u16 q_idx = channel->offermsg.offer.sub_channel_index; struct net_device *ndev = hv_get_drvdata(device); - struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); int work_done = 0; /* If starting a new interval */ @@ -1236,17 +1195,23 @@ int netvsc_poll(struct napi_struct *napi, int budget) nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } - /* If receive ring was exhausted - * and not doing busy poll + /* if ring is empty, signal host */ + if (!nvchan->desc) + hv_pkt_iter_close(channel); + + /* If send of pending receive completions suceeded + * and did not exhaust NAPI budget this time + * and not doing busy poll * then re-enable host interrupts - * and reschedule if ring is not empty. + * and reschedule if ring is not empty. */ - if (work_done < budget && + if (send_recv_completions(nvchan) == 0 && + work_done < budget && napi_complete_done(napi, work_done) && - hv_end_read(&channel->inbound) != 0) + hv_end_read(&channel->inbound)) { + hv_begin_read(&channel->inbound); napi_reschedule(napi); - - netvsc_chk_recv_comp(net_device, channel, q_idx); + } /* Driver may overshoot since multiple packets per descriptor */ return min(work_done, budget); @@ -1258,10 +1223,15 @@ int netvsc_poll(struct napi_struct *napi, int budget) void netvsc_channel_cb(void *context) { struct netvsc_channel *nvchan = context; + struct vmbus_channel *channel = nvchan->channel; + struct hv_ring_buffer_info *rbi = &channel->inbound; + + /* preload first vmpacket descriptor */ + prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index); if (napi_schedule_prep(&nvchan->napi)) { /* disable interupts from host */ - hv_begin_read(&nvchan->channel->inbound); + hv_begin_read(rbi); __napi_schedule(&nvchan->napi); } @@ -1271,8 +1241,8 @@ void netvsc_channel_cb(void *context) * netvsc_device_add - Callback when the device belonging to this * driver is added */ -int netvsc_device_add(struct hv_device *device, - const struct netvsc_device_info *device_info) +struct netvsc_device *netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *device_info) { int i, ret = 0; int ring_size = device_info->ring_size; @@ -1282,7 +1252,7 @@ int netvsc_device_add(struct hv_device *device, net_device = alloc_net_device(); if (!net_device) - return -ENOMEM; + return ERR_PTR(-ENOMEM); net_device->ring_size = ring_size; @@ -1302,6 +1272,7 @@ int netvsc_device_add(struct hv_device *device, struct netvsc_channel *nvchan = &net_device->chan_table[i]; nvchan->channel = device->channel; + nvchan->net_device = net_device; } /* Enable NAPI handler before init callbacks */ @@ -1338,10 +1309,11 @@ int netvsc_device_add(struct hv_device *device, goto close; } - return ret; + return net_device; close: - netif_napi_del(&net_device->chan_table[0].napi); + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ vmbus_close(device->channel); @@ -1349,6 +1321,5 @@ close: cleanup: free_netvsc_device(&net_device->rcu); - return ret; - + return ERR_PTR(ret); } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0d78727f1a14..9453eef6d09f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -33,6 +33,8 @@ #include <linux/if_vlan.h> #include <linux/in.h> #include <linux/slab.h> +#include <linux/rtnetlink.h> + #include <net/arp.h> #include <net/route.h> #include <net/sock.h> @@ -69,7 +71,7 @@ static void netvsc_set_multicast_list(struct net_device *net) static int netvsc_open(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = ndev_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); struct rndis_device *rdev; int ret = 0; @@ -280,9 +282,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, - struct hv_page_buffer **page_buf) + struct hv_page_buffer *pb) { - struct hv_page_buffer *pb = *page_buf; u32 slots_used = 0; char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; @@ -359,7 +360,7 @@ static u32 net_checksum_info(struct sk_buff *skb) if (ip6->nexthdr == IPPROTO_TCP) return TRANSPORT_INFO_IPV6_TCP; - else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) + else if (ip6->nexthdr == IPPROTO_UDP) return TRANSPORT_INFO_IPV6_UDP; } @@ -377,8 +378,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 rndis_msg_size; struct rndis_per_packet_info *ppi; u32 hash; - struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number @@ -524,12 +524,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, - skb, packet, &pb); + skb, packet, pb); /* timestamp packet in software */ skb_tx_timestamp(skb); - ret = netvsc_send(net_device_ctx->device_ctx, packet, - rndis_msg, &pb, skb); + + ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb); if (likely(ret == 0)) return NETDEV_TX_OK; @@ -736,39 +736,16 @@ static void netvsc_get_channels(struct net_device *net, } } -static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, - u32 num_chn) -{ - struct netvsc_device_info device_info; - int ret; - - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = num_chn; - device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = num_chn; - - ret = rndis_filter_device_add(dev, &device_info); - if (ret) - return ret; - - ret = netif_set_real_num_tx_queues(net, num_chn); - if (ret) - return ret; - - ret = netif_set_real_num_rx_queues(net, num_chn); - - return ret; -} - static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); - unsigned int count = channels->combined_count; - bool was_running; - int ret; + unsigned int orig, count = channels->combined_count; + struct netvsc_device_info device_info; + bool was_opened; + int ret = 0; /* We do not support separate count for rx, tx, or other */ if (count == 0 || @@ -787,25 +764,32 @@ static int netvsc_set_channels(struct net_device *net, if (count > nvdev->max_chn) return -EINVAL; - was_running = netif_running(net); - if (was_running) { - ret = netvsc_close(net); - if (ret) - return ret; - } + orig = nvdev->num_chn; + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); rndis_filter_device_remove(dev, nvdev); - ret = netvsc_set_queues(net, dev, count); - if (ret == 0) - nvdev->num_chn = count; - else - netvsc_set_queues(net, dev, nvdev->num_chn); + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = count; + device_info.ring_size = ring_size; + + nvdev = rndis_filter_device_add(dev, &device_info); + if (!IS_ERR(nvdev)) { + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); + } else { + ret = PTR_ERR(nvdev); + device_info.num_chn = orig; + rndis_filter_device_add(dev, &device_info); + } - if (was_running) - ret = netvsc_open(net); + if (was_opened) + rndis_filter_open(nvdev); /* We may have missed link change notifications */ + net_device_ctx->last_reconfig = 0; schedule_delayed_work(&net_device_ctx->dwork, 0); return ret; @@ -871,39 +855,40 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); struct hv_device *hdev = ndevctx->device_ctx; + int orig_mtu = ndev->mtu; struct netvsc_device_info device_info; - bool was_running; + bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) return -ENODEV; - was_running = netif_running(ndev); - if (was_running) { - ret = netvsc_close(ndev); - if (ret) - return ret; - } + netif_device_detach(ndev); + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = nvdev->num_chn; - device_info.max_num_vrss_chns = nvdev->num_chn; rndis_filter_device_remove(hdev, nvdev); - /* 'nvdev' has been freed in rndis_filter_device_remove() -> - * netvsc_device_remove () -> free_netvsc_device(). - * We mustn't access it before it's re-created in - * rndis_filter_device_add() -> netvsc_device_add(). - */ - ndev->mtu = mtu; - rndis_filter_device_add(hdev, &device_info); + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + + /* Attempt rollback to original MTU */ + ndev->mtu = orig_mtu; + rndis_filter_device_add(hdev, &device_info); + } + + if (was_opened) + rndis_filter_open(nvdev); - if (was_running) - ret = netvsc_open(ndev); + netif_device_attach(ndev); /* We may have missed link change notifications */ schedule_delayed_work(&ndevctx->dwork, 0); @@ -959,6 +944,8 @@ static void netvsc_get_stats64(struct net_device *net, static int netvsc_set_mac_addr(struct net_device *ndev, void *p) { + struct net_device_context *ndc = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); struct sockaddr *addr = p; char save_adr[ETH_ALEN]; unsigned char save_aatype; @@ -971,7 +958,10 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) if (err != 0) return err; - err = rndis_filter_set_device_mac(ndev, addr->sa_data); + if (!nvdev) + return -ENODEV; + + err = rndis_filter_set_device_mac(nvdev, addr->sa_data); if (err != 0) { /* roll back to saved MAC */ memcpy(ndev->dev_addr, save_adr, ETH_ALEN); @@ -1017,7 +1007,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); const void *nds = &ndc->eth_stats; const struct netvsc_stats *qstats; unsigned int start; @@ -1055,7 +1045,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); u8 *p = data; int i; @@ -1113,7 +1103,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rules) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); if (!nvdev) return -ENODEV; @@ -1163,7 +1153,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *ndev = rcu_dereference(ndc->nvdev); + struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); struct rndis_device *rndis_dev; int i; @@ -1386,7 +1376,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) continue; /* not a netvsc device */ net_device_ctx = netdev_priv(dev); - if (net_device_ctx->nvdev == NULL) + if (!rtnl_dereference(net_device_ctx->nvdev)) continue; /* device is removed */ if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) @@ -1551,8 +1541,10 @@ static int netvsc_probe(struct hv_device *dev, memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; - ret = rndis_filter_device_add(dev, &device_info); - if (ret != 0) { + + nvdev = rndis_filter_device_add(dev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); free_netdev(net); hv_set_drvdata(dev, NULL); @@ -1566,11 +1558,11 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - /* RCU not necessary here, device not registered */ - nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); + netdev_lockdep_set_classes(net); + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) @@ -1611,7 +1603,8 @@ static int netvsc_remove(struct hv_device *dev) * removed. Also blocks mtu and channel changes. */ rtnl_lock(); - rndis_filter_device_remove(dev, ndev_ctx->nvdev); + rndis_filter_device_remove(dev, + rtnl_dereference(ndev_ctx->nvdev)); rtnl_unlock(); unregister_netdev(net); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 85c00e1c52b6..44165fe328a4 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -28,6 +28,7 @@ #include <linux/if_vlan.h> #include <linux/nls.h> #include <linux/vmalloc.h> +#include <linux/rtnetlink.h> #include "hyperv_net.h" @@ -213,11 +214,11 @@ static void dump_rndis_message(struct hv_device *hv_dev, static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { - int ret; struct hv_netvsc_packet *packet; struct hv_page_buffer page_buf[2]; struct hv_page_buffer *pb = page_buf; struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); + int ret; /* Setup the packet to send it */ packet = &req->pkt; @@ -243,7 +244,10 @@ static int rndis_filter_send_request(struct rndis_device *dev, pb[0].len; } - ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL); + rcu_read_lock_bh(); + ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL); + rcu_read_unlock_bh(); + return ret; } @@ -443,8 +447,9 @@ int rndis_filter_receive(struct net_device *ndev, return 0; } -static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, - void *result, u32 *result_size) +static int rndis_filter_query_device(struct rndis_device *dev, + struct netvsc_device *nvdev, + u32 oid, void *result, u32 *result_size) { struct rndis_request *request; u32 inresult_size = *result_size; @@ -471,8 +476,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, query->dev_vc_handle = 0; if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) { - struct net_device_context *ndevctx = netdev_priv(dev->ndev); - struct netvsc_device *nvdev = ndevctx->nvdev; struct ndis_offload *hwcaps; u32 nvsp_version = nvdev->nvsp_version; u8 ndis_rev; @@ -541,14 +544,15 @@ cleanup: /* Get the hardware offload capabilities */ static int -rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps) +rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device, + struct ndis_offload *caps) { u32 caps_len = sizeof(*caps); int ret; memset(caps, 0, sizeof(*caps)); - ret = rndis_filter_query_device(dev, + ret = rndis_filter_query_device(dev, net_device, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, caps, &caps_len); if (ret) @@ -577,11 +581,12 @@ rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps) return 0; } -static int rndis_filter_query_device_mac(struct rndis_device *dev) +static int rndis_filter_query_device_mac(struct rndis_device *dev, + struct netvsc_device *net_device) { u32 size = ETH_ALEN; - return rndis_filter_query_device(dev, + return rndis_filter_query_device(dev, net_device, RNDIS_OID_802_3_PERMANENT_ADDRESS, dev->hw_mac_adr, &size); } @@ -589,9 +594,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev) #define NWADR_STR "NetworkAddress" #define NWADR_STRLEN 14 -int rndis_filter_set_device_mac(struct net_device *ndev, char *mac) +int rndis_filter_set_device_mac(struct netvsc_device *nvdev, + const char *mac) { - struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev); struct rndis_device *rdev = nvdev->extension; struct rndis_request *request; struct rndis_set_request *set; @@ -645,11 +650,8 @@ int rndis_filter_set_device_mac(struct net_device *ndev, char *mac) wait_for_completion(&request->wait_event); set_complete = &request->response_msg.msg.set_complete; - if (set_complete->status != RNDIS_STATUS_SUCCESS) { - netdev_err(ndev, "Fail to set MAC on host side:0x%x\n", - set_complete->status); - ret = -EINVAL; - } + if (set_complete->status != RNDIS_STATUS_SUCCESS) + ret = -EIO; cleanup: put_rndis_request(rdev, request); @@ -658,9 +660,9 @@ cleanup: static int rndis_filter_set_offload_params(struct net_device *ndev, + struct netvsc_device *nvdev, struct ndis_offload_params *req_offloads) { - struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev); struct rndis_device *rdev = nvdev->extension; struct rndis_request *request; struct rndis_set_request *set; @@ -782,27 +784,27 @@ cleanup: return ret; } -static int rndis_filter_query_device_link_status(struct rndis_device *dev) +static int rndis_filter_query_device_link_status(struct rndis_device *dev, + struct netvsc_device *net_device) { u32 size = sizeof(u32); u32 link_status; - int ret; - - ret = rndis_filter_query_device(dev, - RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, - &link_status, &size); - return ret; + return rndis_filter_query_device(dev, net_device, + RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, + &link_status, &size); } -static int rndis_filter_query_link_speed(struct rndis_device *dev) +static int rndis_filter_query_link_speed(struct rndis_device *dev, + struct netvsc_device *net_device) { u32 size = sizeof(u32); u32 link_speed; struct net_device_context *ndc; int ret; - ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED, + ret = rndis_filter_query_device(dev, net_device, + RNDIS_OID_GEN_LINK_SPEED, &link_speed, &size); if (!ret) { @@ -871,14 +873,14 @@ void rndis_filter_update(struct netvsc_device *nvdev) schedule_work(&rdev->mcast_work); } -static int rndis_filter_init_device(struct rndis_device *dev) +static int rndis_filter_init_device(struct rndis_device *dev, + struct netvsc_device *nvdev) { struct rndis_request *request; struct rndis_initialize_request *init; struct rndis_initialize_complete *init_complete; u32 status; int ret; - struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev); request = get_rndis_request(dev, RNDIS_MSG_INIT, RNDIS_MESSAGE_SIZE(struct rndis_initialize_request)); @@ -926,12 +928,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev) { int i; - if (atomic_read(&nvdev->num_outstanding_recvs) > 0) - return false; - for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; + if (nvchan->mrc.first != nvchan->mrc.next) + return false; + if (atomic_read(&nvchan->queue_sends) > 0) return false; } @@ -944,7 +946,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev) struct rndis_request *request; struct rndis_halt_request *halt; struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); - struct netvsc_device *nvdev = net_device_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); /* Attempt to do a rndis device halt */ request = get_rndis_request(dev, RNDIS_MSG_HALT, @@ -1015,20 +1017,20 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) { struct net_device *ndev = hv_get_drvdata(new_sc->primary_channel->device_obj); - struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev); + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct netvsc_device *nvscdev; u16 chn_index = new_sc->offermsg.offer.sub_channel_index; struct netvsc_channel *nvchan; int ret; - if (chn_index >= nvscdev->num_chn) + /* This is safe because this callback only happens when + * new device is being setup and waiting on the channel_init_wait. + */ + nvscdev = rcu_dereference_raw(ndev_ctx->nvdev); + if (!nvscdev || chn_index >= nvscdev->num_chn) return; nvchan = nvscdev->chan_table + chn_index; - nvchan->mrc.buf - = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data)); - - if (!nvchan->mrc.buf) - return; /* Because the device uses NAPI, all the interrupt batching and * control is done via Net softirq, not the channel handling @@ -1052,8 +1054,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) complete(&nvscdev->channel_init_wait); } -int rndis_filter_device_add(struct hv_device *dev, - struct netvsc_device_info *device_info) +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, + struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx = netdev_priv(net); @@ -1072,21 +1074,20 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device = get_rndis_device(); if (!rndis_device) - return -ENODEV; + return ERR_PTR(-ENODEV); /* * Let the inner driver handle this first to create the netvsc channel * NOTE! Once the channel is created, we may get a receive callback * (RndisFilterOnReceive()) before this call is completed */ - ret = netvsc_device_add(dev, device_info); - if (ret != 0) { + net_device = netvsc_device_add(dev, device_info); + if (IS_ERR(net_device)) { kfree(rndis_device); - return ret; + return net_device; } /* Initialize the rndis device */ - net_device = net_device_ctx->nvdev; net_device->max_chn = 1; net_device->num_chn = 1; @@ -1096,35 +1097,29 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device->ndev = net; /* Send the rndis initialization message */ - ret = rndis_filter_init_device(rndis_device); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + ret = rndis_filter_init_device(rndis_device, net_device); + if (ret != 0) + goto err_dev_remv; /* Get the MTU from the host */ size = sizeof(u32); - ret = rndis_filter_query_device(rndis_device, + ret = rndis_filter_query_device(rndis_device, net_device, RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, &mtu, &size); if (ret == 0 && size == sizeof(u32) && mtu < net->mtu) net->mtu = mtu; /* Get the mac address */ - ret = rndis_filter_query_device_mac(rndis_device); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + ret = rndis_filter_query_device_mac(rndis_device, net_device); + if (ret != 0) + goto err_dev_remv; memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); /* Find HW offload capabilities */ - ret = rndis_query_hwcaps(rndis_device, &hwcaps); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps); + if (ret != 0) + goto err_dev_remv; /* A value of zero means "no change"; now turn on what we want. */ memset(&offloads, 0, sizeof(struct ndis_offload_params)); @@ -1179,24 +1174,24 @@ int rndis_filter_device_add(struct hv_device *dev, netif_set_gso_max_size(net, gso_max_size); - ret = rndis_filter_set_offload_params(net, &offloads); + ret = rndis_filter_set_offload_params(net, net_device, &offloads); if (ret) goto err_dev_remv; - rndis_filter_query_device_link_status(rndis_device); + rndis_filter_query_device_link_status(rndis_device, net_device); netdev_dbg(net, "Device MAC %pM link state %s\n", rndis_device->hw_mac_adr, rndis_device->link_state ? "down" : "up"); if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) - return 0; + return net_device; - rndis_filter_query_link_speed(rndis_device); + rndis_filter_query_link_speed(rndis_device, net_device); /* vRSS setup */ memset(&rsscap, 0, rsscap_size); - ret = rndis_filter_query_device(rndis_device, + ret = rndis_filter_query_device(rndis_device, net_device, OID_GEN_RECEIVE_SCALE_CAPABILITIES, &rsscap, &rsscap_size); if (ret || rsscap.num_recv_que < 2) @@ -1223,7 +1218,16 @@ int rndis_filter_device_add(struct hv_device *dev, num_rss_qs = net_device->num_chn - 1; if (num_rss_qs == 0) - return 0; + return net_device; + + for (i = 1; i < net_device->num_chn; i++) { + ret = netvsc_alloc_recv_comp_ring(net_device, i); + if (ret) { + while (--i != 0) + vfree(net_device->chan_table[i].mrc.slots); + goto out; + } + } refcount_set(&net_device->sc_offered, num_rss_qs); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); @@ -1260,11 +1264,11 @@ out: net_device->num_chn = 1; } - return 0; /* return 0 because primary channel can be used alone */ + return net_device; err_dev_remv: rndis_filter_device_remove(dev, net_device); - return ret; + return ERR_PTR(ret); } void rndis_filter_device_remove(struct hv_device *dev, @@ -1302,3 +1306,8 @@ int rndis_filter_close(struct netvsc_device *nvdev) return rndis_filter_close_device(nvdev->extension); } + +bool rndis_filter_opened(const struct netvsc_device *nvdev) +{ + return atomic_read(&nvdev->open_cnt) > 0; +} diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index a626c539fb17..326243fae7e2 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -66,6 +66,7 @@ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/workqueue.h> +#include <linux/interrupt.h> #include <net/ieee802154_netdev.h> #include <net/mac802154.h> diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f37e3c1fd4e7..fdde20735416 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -169,7 +169,7 @@ static void ipvlan_port_destroy(struct net_device *dev) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 22f133ea8d7b..5dea2063dbc8 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -24,7 +24,7 @@ #include <linux/virtio_net.h> #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) + NETIF_F_TSO6) static dev_t ipvtap_major; static struct cdev ipvtap_cdev; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0f581ee74fe4..ca35c6ba7947 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -841,7 +841,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 91e7b19bbf86..c2d0ea2fb019 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -49,7 +49,7 @@ static struct class macvtap_class = { static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) + NETIF_F_TSO6) static void macvtap_count_tx_dropped(struct tap_dev *tap) { diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 5d314f143aea..361fe9927ef2 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -55,43 +55,35 @@ #define MII_M1011_IMASK_INIT 0x6400 #define MII_M1011_IMASK_CLEAR 0x0000 -#define MII_M1011_PHY_SCR 0x10 -#define MII_M1011_PHY_SCR_MDI 0x0000 -#define MII_M1011_PHY_SCR_MDI_X 0x0020 -#define MII_M1011_PHY_SCR_AUTO_CROSS 0x0060 - -#define MII_M1145_PHY_EXT_SR 0x1b -#define MII_M1145_PHY_EXT_CR 0x14 -#define MII_M1145_RGMII_RX_DELAY 0x0080 -#define MII_M1145_RGMII_TX_DELAY 0x0002 -#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK 0x4 -#define MII_M1145_HWCFG_MODE_MASK 0xf -#define MII_M1145_HWCFG_FIBER_COPPER_AUTO 0x8000 - -#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK 0x4 -#define MII_M1145_HWCFG_MODE_MASK 0xf -#define MII_M1145_HWCFG_FIBER_COPPER_AUTO 0x8000 +#define MII_M1011_PHY_SCR 0x10 +#define MII_M1011_PHY_SCR_DOWNSHIFT_EN BIT(11) +#define MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT 12 +#define MII_M1011_PHY_SRC_DOWNSHIFT_MASK 0x7800 +#define MII_M1011_PHY_SCR_MDI (0x0 << 5) +#define MII_M1011_PHY_SCR_MDI_X (0x1 << 5) +#define MII_M1011_PHY_SCR_AUTO_CROSS (0x3 << 5) #define MII_M1111_PHY_LED_CONTROL 0x18 #define MII_M1111_PHY_LED_DIRECT 0x4100 #define MII_M1111_PHY_LED_COMBINE 0x411c #define MII_M1111_PHY_EXT_CR 0x14 -#define MII_M1111_RX_DELAY 0x80 -#define MII_M1111_TX_DELAY 0x2 +#define MII_M1111_RGMII_RX_DELAY BIT(7) +#define MII_M1111_RGMII_TX_DELAY BIT(1) #define MII_M1111_PHY_EXT_SR 0x1b #define MII_M1111_HWCFG_MODE_MASK 0xf -#define MII_M1111_HWCFG_MODE_COPPER_RGMII 0xb #define MII_M1111_HWCFG_MODE_FIBER_RGMII 0x3 #define MII_M1111_HWCFG_MODE_SGMII_NO_CLK 0x4 +#define MII_M1111_HWCFG_MODE_RTBI 0x7 #define MII_M1111_HWCFG_MODE_COPPER_RTBI 0x9 -#define MII_M1111_HWCFG_FIBER_COPPER_AUTO 0x8000 -#define MII_M1111_HWCFG_FIBER_COPPER_RES 0x2000 +#define MII_M1111_HWCFG_MODE_COPPER_RGMII 0xb +#define MII_M1111_HWCFG_FIBER_COPPER_RES BIT(13) +#define MII_M1111_HWCFG_FIBER_COPPER_AUTO BIT(15) #define MII_88E1121_PHY_MSCR_REG 21 #define MII_88E1121_PHY_MSCR_RX_DELAY BIT(5) #define MII_88E1121_PHY_MSCR_TX_DELAY BIT(4) -#define MII_88E1121_PHY_MSCR_DELAY_MASK (~(0x3 << 4)) +#define MII_88E1121_PHY_MSCR_DELAY_MASK (~(BIT(5) || BIT(4))) #define MII_88E1121_MISC_TEST 0x1a #define MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK 0x1f00 @@ -108,24 +100,24 @@ #define MII_88E1318S_PHY_MSCR1_PAD_ODD BIT(6) /* Copper Specific Interrupt Enable Register */ -#define MII_88E1318S_PHY_CSIER 0x12 +#define MII_88E1318S_PHY_CSIER 0x12 /* WOL Event Interrupt Enable */ -#define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7) +#define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7) /* LED Timer Control Register */ -#define MII_88E1318S_PHY_LED_TCR 0x12 -#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15) -#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7) -#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW BIT(11) +#define MII_88E1318S_PHY_LED_TCR 0x12 +#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15) +#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7) +#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW BIT(11) /* Magic Packet MAC address registers */ -#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2 0x17 -#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1 0x18 -#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0 0x19 +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2 0x17 +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1 0x18 +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0 0x19 -#define MII_88E1318S_PHY_WOL_CTRL 0x10 -#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12) -#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14) +#define MII_88E1318S_PHY_WOL_CTRL 0x10 +#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12) +#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14) #define MII_88E1121_PHY_LED_CTRL 16 #define MII_88E1121_PHY_LED_DEF 0x0030 @@ -138,8 +130,6 @@ #define MII_M1011_PHY_STATUS_RESOLVED 0x0800 #define MII_M1011_PHY_STATUS_LINK 0x0400 -#define MII_M1116R_CONTROL_REG_MAC 21 - #define MII_88E3016_PHY_SPEC_CTRL 0x10 #define MII_88E3016_DISABLE_SCRAMBLER 0x0200 #define MII_88E3016_AUTO_MDIX_CROSSOVER 0x0030 @@ -152,7 +142,7 @@ #define LPA_FIBER_1000HALF 0x40 #define LPA_FIBER_1000FULL 0x20 -#define LPA_PAUSE_FIBER 0x180 +#define LPA_PAUSE_FIBER 0x180 #define LPA_PAUSE_ASYM_FIBER 0x100 #define ADVERTISE_FIBER_1000HALF 0x40 @@ -274,6 +264,23 @@ static int marvell_set_polarity(struct phy_device *phydev, int polarity) return 0; } +static int marvell_set_downshift(struct phy_device *phydev, bool enable, + u8 retries) +{ + int reg; + + reg = phy_read(phydev, MII_M1011_PHY_SCR); + if (reg < 0) + return reg; + + reg &= MII_M1011_PHY_SRC_DOWNSHIFT_MASK; + reg |= ((retries - 1) << MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT); + if (enable) + reg |= MII_M1011_PHY_SCR_DOWNSHIFT_EN; + + return phy_write(phydev, MII_M1011_PHY_SCR, reg); +} + static int marvell_config_aneg(struct phy_device *phydev) { int err; @@ -292,17 +299,11 @@ static int marvell_config_aneg(struct phy_device *phydev) return err; if (phydev->autoneg != AUTONEG_ENABLE) { - int bmcr; - /* A write to speed/duplex bits (that is performed by * genphy_config_aneg() call above) must be followed by * a software reset. Otherwise, the write has no effect. */ - bmcr = phy_read(phydev, MII_BMCR); - if (bmcr < 0) - return bmcr; - - err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET); + err = genphy_soft_reset(phydev); if (err < 0) return err; } @@ -318,8 +319,7 @@ static int m88e1101_config_aneg(struct phy_device *phydev) * that certain registers get written in order * to restart autonegotiation */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); - + err = genphy_soft_reset(phydev); if (err < 0) return err; @@ -354,7 +354,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev) * that certain registers get written in order * to restart autonegotiation */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); + err = genphy_soft_reset(phydev); err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) @@ -370,17 +370,11 @@ static int m88e1111_config_aneg(struct phy_device *phydev) return err; if (phydev->autoneg != AUTONEG_ENABLE) { - int bmcr; - /* A write to speed/duplex bits (that is performed by * genphy_config_aneg() call above) must be followed by * a software reset. Otherwise, the write has no effect. */ - bmcr = phy_read(phydev, MII_BMCR); - if (bmcr < 0) - return bmcr; - - err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET); + err = genphy_soft_reset(phydev); if (err < 0) return err; } @@ -466,7 +460,7 @@ static int marvell_of_reg_init(struct phy_device *phydev) } #endif /* CONFIG_OF_MDIO */ -static int m88e1121_config_aneg(struct phy_device *phydev) +static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev) { int err, oldpage, mscr; @@ -474,31 +468,45 @@ static int m88e1121_config_aneg(struct phy_device *phydev) if (oldpage < 0) return oldpage; - if (phy_interface_is_rgmii(phydev)) { - mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG) & - MII_88E1121_PHY_MSCR_DELAY_MASK; - - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) - mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY | - MII_88E1121_PHY_MSCR_TX_DELAY); - else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) - mscr |= MII_88E1121_PHY_MSCR_RX_DELAY; - else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) - mscr |= MII_88E1121_PHY_MSCR_TX_DELAY; - - err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr); - if (err < 0) - return err; + mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG); + if (mscr < 0) { + err = mscr; + goto out; } + mscr &= MII_88E1121_PHY_MSCR_DELAY_MASK; + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY | + MII_88E1121_PHY_MSCR_TX_DELAY); + else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + mscr |= MII_88E1121_PHY_MSCR_RX_DELAY; + else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + mscr |= MII_88E1121_PHY_MSCR_TX_DELAY; + + err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr); + +out: marvell_set_page(phydev, oldpage); - err = phy_write(phydev, MII_BMCR, BMCR_RESET); + return err; +} + +static int m88e1121_config_aneg(struct phy_device *phydev) +{ + int err = 0; + + if (phy_interface_is_rgmii(phydev)) { + err = m88e1121_config_aneg_rgmii_delays(phydev); + if (err) + return err; + } + + err = genphy_soft_reset(phydev); if (err < 0) return err; - err = phy_write(phydev, MII_M1011_PHY_SCR, - MII_M1011_PHY_SCR_AUTO_CROSS); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; @@ -596,7 +604,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) if (changed == 0) { /* Advertisement hasn't changed, but maybe aneg was never on to - * begin with? Or maybe phy was isolated? + * begin with? Or maybe phy was isolated? */ int ctl = phy_read(phydev, MII_BMCR); @@ -653,12 +661,9 @@ static int marvell_config_init(struct phy_device *phydev) static int m88e1116r_config_init(struct phy_device *phydev) { - int temp; int err; - temp = phy_read(phydev, MII_BMCR); - temp |= BMCR_RESET; - err = phy_write(phydev, MII_BMCR, temp); + err = genphy_soft_reset(phydev); if (err < 0) return err; @@ -668,35 +673,22 @@ static int m88e1116r_config_init(struct phy_device *phydev) if (err < 0) return err; - temp = phy_read(phydev, MII_M1011_PHY_SCR); - temp |= (7 << 12); /* max number of gigabit attempts */ - temp |= (1 << 11); /* enable downshift */ - temp |= MII_M1011_PHY_SCR_AUTO_CROSS; - err = phy_write(phydev, MII_M1011_PHY_SCR, temp); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - err = marvell_set_page(phydev, MII_MARVELL_MSCR_PAGE); - if (err < 0) - return err; - temp = phy_read(phydev, MII_M1116R_CONTROL_REG_MAC); - temp |= (1 << 5); - temp |= (1 << 4); - err = phy_write(phydev, MII_M1116R_CONTROL_REG_MAC, temp); + err = marvell_set_downshift(phydev, true, 8); if (err < 0) return err; - err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); + + err = m88e1121_config_aneg_rgmii_delays(phydev); if (err < 0) return err; - temp = phy_read(phydev, MII_BMCR); - temp |= BMCR_RESET; - err = phy_write(phydev, MII_BMCR, temp); + err = genphy_soft_reset(phydev); if (err < 0) return err; - mdelay(500); - return marvell_config_init(phydev); } @@ -719,9 +711,29 @@ static int m88e3016_config_init(struct phy_device *phydev) return marvell_config_init(phydev); } -static int m88e1111_config_init_rgmii(struct phy_device *phydev) +static int m88e1111_config_init_hwcfg_mode(struct phy_device *phydev, + u16 mode, + int fibre_copper_auto) +{ + int temp; + + temp = phy_read(phydev, MII_M1111_PHY_EXT_SR); + if (temp < 0) + return temp; + + temp &= ~(MII_M1111_HWCFG_MODE_MASK | + MII_M1111_HWCFG_FIBER_COPPER_AUTO | + MII_M1111_HWCFG_FIBER_COPPER_RES); + temp |= mode; + + if (fibre_copper_auto) + temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO; + + return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp); +} + +static int m88e1111_config_init_rgmii_delays(struct phy_device *phydev) { - int err; int temp; temp = phy_read(phydev, MII_M1111_PHY_EXT_CR); @@ -729,16 +741,24 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev) return temp; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) { - temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY); + temp |= (MII_M1111_RGMII_RX_DELAY | MII_M1111_RGMII_TX_DELAY); } else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - temp &= ~MII_M1111_TX_DELAY; - temp |= MII_M1111_RX_DELAY; + temp &= ~MII_M1111_RGMII_TX_DELAY; + temp |= MII_M1111_RGMII_RX_DELAY; } else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - temp &= ~MII_M1111_RX_DELAY; - temp |= MII_M1111_TX_DELAY; + temp &= ~MII_M1111_RGMII_RX_DELAY; + temp |= MII_M1111_RGMII_TX_DELAY; } - err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp); + return phy_write(phydev, MII_M1111_PHY_EXT_CR, temp); +} + +static int m88e1111_config_init_rgmii(struct phy_device *phydev) +{ + int temp; + int err; + + err = m88e1111_config_init_rgmii_delays(phydev); if (err < 0) return err; @@ -759,17 +779,11 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev) static int m88e1111_config_init_sgmii(struct phy_device *phydev) { int err; - int temp; - - temp = phy_read(phydev, MII_M1111_PHY_EXT_SR); - if (temp < 0) - return temp; - - temp &= ~(MII_M1111_HWCFG_MODE_MASK); - temp |= MII_M1111_HWCFG_MODE_SGMII_NO_CLK; - temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO; - err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp); + err = m88e1111_config_init_hwcfg_mode( + phydev, + MII_M1111_HWCFG_MODE_SGMII_NO_CLK, + MII_M1111_HWCFG_FIBER_COPPER_AUTO); if (err < 0) return err; @@ -780,48 +794,27 @@ static int m88e1111_config_init_sgmii(struct phy_device *phydev) static int m88e1111_config_init_rtbi(struct phy_device *phydev) { int err; - int temp; - temp = phy_read(phydev, MII_M1111_PHY_EXT_CR); - if (temp < 0) - return temp; - - temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY); - err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp); - if (err < 0) + err = m88e1111_config_init_rgmii_delays(phydev); + if (err) return err; - temp = phy_read(phydev, MII_M1111_PHY_EXT_SR); - if (temp < 0) - return temp; - - temp &= ~(MII_M1111_HWCFG_MODE_MASK | - MII_M1111_HWCFG_FIBER_COPPER_RES); - temp |= 0x7 | MII_M1111_HWCFG_FIBER_COPPER_AUTO; - - err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp); + err = m88e1111_config_init_hwcfg_mode( + phydev, + MII_M1111_HWCFG_MODE_RTBI, + MII_M1111_HWCFG_FIBER_COPPER_AUTO); if (err < 0) return err; /* soft reset */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); + err = genphy_soft_reset(phydev); if (err < 0) return err; - do - temp = phy_read(phydev, MII_BMCR); - while (temp & BMCR_RESET); - - temp = phy_read(phydev, MII_M1111_PHY_EXT_SR); - if (temp < 0) - return temp; - - temp &= ~(MII_M1111_HWCFG_MODE_MASK | - MII_M1111_HWCFG_FIBER_COPPER_RES); - temp |= MII_M1111_HWCFG_MODE_COPPER_RTBI | - MII_M1111_HWCFG_FIBER_COPPER_AUTO; - - return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp); + return m88e1111_config_init_hwcfg_mode( + phydev, + MII_M1111_HWCFG_MODE_RTBI, + MII_M1111_HWCFG_FIBER_COPPER_AUTO); } static int m88e1111_config_init(struct phy_device *phydev) @@ -850,7 +843,7 @@ static int m88e1111_config_init(struct phy_device *phydev) if (err < 0) return err; - return phy_write(phydev, MII_BMCR, BMCR_RESET); + return genphy_soft_reset(phydev); } static int m88e1121_config_init(struct phy_device *phydev) @@ -912,12 +905,11 @@ static int m88e1118_config_aneg(struct phy_device *phydev) { int err; - err = phy_write(phydev, MII_BMCR, BMCR_RESET); + err = genphy_soft_reset(phydev); if (err < 0) return err; - err = phy_write(phydev, MII_M1011_PHY_SCR, - MII_M1011_PHY_SCR_AUTO_CROSS); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; @@ -961,7 +953,7 @@ static int m88e1118_config_init(struct phy_device *phydev) if (err < 0) return err; - return phy_write(phydev, MII_BMCR, BMCR_RESET); + return genphy_soft_reset(phydev); } static int m88e1149_config_init(struct phy_device *phydev) @@ -987,20 +979,15 @@ static int m88e1149_config_init(struct phy_device *phydev) if (err < 0) return err; - return phy_write(phydev, MII_BMCR, BMCR_RESET); + return genphy_soft_reset(phydev); } static int m88e1145_config_init_rgmii(struct phy_device *phydev) { + int temp; int err; - int temp = phy_read(phydev, MII_M1145_PHY_EXT_CR); - - if (temp < 0) - return temp; - temp |= (MII_M1145_RGMII_RX_DELAY | MII_M1145_RGMII_TX_DELAY); - - err = phy_write(phydev, MII_M1145_PHY_EXT_CR, temp); + err = m88e1111_config_init_rgmii_delays(phydev); if (err < 0) return err; @@ -1032,16 +1019,9 @@ static int m88e1145_config_init_rgmii(struct phy_device *phydev) static int m88e1145_config_init_sgmii(struct phy_device *phydev) { - int temp = phy_read(phydev, MII_M1145_PHY_EXT_SR); - - if (temp < 0) - return temp; - - temp &= ~MII_M1145_HWCFG_MODE_MASK; - temp |= MII_M1145_HWCFG_MODE_SGMII_NO_CLK; - temp |= MII_M1145_HWCFG_FIBER_COPPER_AUTO; - - return phy_write(phydev, MII_M1145_PHY_EXT_SR, temp); + return m88e1111_config_init_hwcfg_mode( + phydev, MII_M1111_HWCFG_MODE_SGMII_NO_CLK, + MII_M1111_HWCFG_FIBER_COPPER_AUTO); } static int m88e1145_config_init(struct phy_device *phydev) @@ -1515,7 +1495,7 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data) } #ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) +#define UINT64_MAX (u64)(~((u64)0)) #endif static u64 marvell_get_stat(struct phy_device *phydev, int i) { diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 34395230ce70..89425ca48412 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -21,6 +21,8 @@ #include <linux/of_platform.h> #include <linux/of_mdio.h> +#include <linux/platform_data/mdio-bcm-unimac.h> + #define MDIO_CMD 0x00 #define MDIO_START_BUSY (1 << 29) #define MDIO_READ_FAIL (1 << 28) @@ -41,6 +43,8 @@ struct unimac_mdio_priv { struct mii_bus *mii_bus; void __iomem *base; + int (*wait_func) (void *wait_func_data); + void *wait_func_data; }; static inline void unimac_mdio_start(struct unimac_mdio_priv *priv) @@ -57,10 +61,28 @@ static inline unsigned int unimac_mdio_busy(struct unimac_mdio_priv *priv) return __raw_readl(priv->base + MDIO_CMD) & MDIO_START_BUSY; } +static int unimac_mdio_poll(void *wait_func_data) +{ + struct unimac_mdio_priv *priv = wait_func_data; + unsigned int timeout = 1000; + + do { + if (!unimac_mdio_busy(priv)) + return 0; + + usleep_range(1000, 2000); + } while (timeout--); + + if (!timeout) + return -ETIMEDOUT; + + return 0; +} + static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg) { struct unimac_mdio_priv *priv = bus->priv; - unsigned int timeout = 1000; + int ret; u32 cmd; /* Prepare the read operation */ @@ -70,15 +92,9 @@ static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg) /* Start MDIO transaction */ unimac_mdio_start(priv); - do { - if (!unimac_mdio_busy(priv)) - break; - - usleep_range(1000, 2000); - } while (timeout--); - - if (!timeout) - return -ETIMEDOUT; + ret = priv->wait_func(priv->wait_func_data); + if (ret) + return ret; cmd = __raw_readl(priv->base + MDIO_CMD); @@ -97,7 +113,6 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) { struct unimac_mdio_priv *priv = bus->priv; - unsigned int timeout = 1000; u32 cmd; /* Prepare the write operation */ @@ -107,17 +122,7 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id, unimac_mdio_start(priv); - do { - if (!unimac_mdio_busy(priv)) - break; - - usleep_range(1000, 2000); - } while (timeout--); - - if (!timeout) - return -ETIMEDOUT; - - return 0; + return priv->wait_func(priv->wait_func_data); } /* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with @@ -155,8 +160,10 @@ static int unimac_mdio_reset(struct mii_bus *bus) } for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - if (read_mask & 1 << addr) + if (read_mask & 1 << addr) { + dev_dbg(&bus->dev, "Workaround for PHY @ %d\n", addr); mdiobus_read(bus, addr, MII_BMSR); + } } return 0; @@ -164,6 +171,7 @@ static int unimac_mdio_reset(struct mii_bus *bus) static int unimac_mdio_probe(struct platform_device *pdev) { + struct unimac_mdio_pdata *pdata = pdev->dev.platform_data; struct unimac_mdio_priv *priv; struct device_node *np; struct mii_bus *bus; @@ -193,12 +201,21 @@ static int unimac_mdio_probe(struct platform_device *pdev) bus = priv->mii_bus; bus->priv = priv; - bus->name = "unimac MII bus"; + if (pdata) { + bus->name = pdata->bus_name; + priv->wait_func = pdata->wait_func; + priv->wait_func_data = pdata->wait_func_data; + bus->phy_mask = ~pdata->phy_mask; + } else { + bus->name = "unimac MII bus"; + priv->wait_func_data = priv; + priv->wait_func = unimac_mdio_poll; + } bus->parent = &pdev->dev; bus->read = unimac_mdio_read; bus->write = unimac_mdio_write; bus->reset = unimac_mdio_reset; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); ret = of_mdiobus_register(bus, np); if (ret) { @@ -240,7 +257,7 @@ MODULE_DEVICE_TABLE(of, unimac_mdio_ids); static struct platform_driver unimac_mdio_driver = { .driver = { - .name = "unimac-mdio", + .name = UNIMAC_MDIO_DRV_NAME, .of_match_table = unimac_mdio_ids, }, .probe = unimac_mdio_probe, @@ -251,4 +268,4 @@ module_platform_driver(unimac_mdio_driver); MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Broadcom UniMAC MDIO bus controller"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:unimac-mdio"); +MODULE_ALIAS("platform:" UNIMAC_MDIO_DRV_NAME); diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c index 6a33646bdf05..c3825c7da038 100644 --- a/drivers/net/phy/mdio-mux-mmioreg.c +++ b/drivers/net/phy/mdio-mux-mmioreg.c @@ -105,7 +105,7 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) const __be32 *iprop; int len, ret; - dev_dbg(&pdev->dev, "probing node %s\n", np->full_name); + dev_dbg(&pdev->dev, "probing node %pOF\n", np); s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL); if (!s) @@ -113,8 +113,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) ret = of_address_to_resource(np, 0, &res); if (ret) { - dev_err(&pdev->dev, "could not obtain memory map for node %s\n", - np->full_name); + dev_err(&pdev->dev, "could not obtain memory map for node %pOF\n", + np); return ret; } s->phys = res.start; @@ -145,15 +145,15 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) for_each_available_child_of_node(np, np2) { iprop = of_get_property(np2, "reg", &len); if (!iprop || len != sizeof(uint32_t)) { - dev_err(&pdev->dev, "mdio-mux child node %s is " - "missing a 'reg' property\n", np2->full_name); + dev_err(&pdev->dev, "mdio-mux child node %pOF is " + "missing a 'reg' property\n", np2); of_node_put(np2); return -ENODEV; } if (be32_to_cpup(iprop) & ~s->mask) { - dev_err(&pdev->dev, "mdio-mux child node %s has " + dev_err(&pdev->dev, "mdio-mux child node %pOF has " "a 'reg' value with unmasked bits\n", - np2->full_name); + np2); of_node_put(np2); return -ENODEV; } @@ -162,8 +162,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) ret = mdio_mux_init(&pdev->dev, mdio_mux_mmioreg_switch_fn, &s->mux_handle, s, NULL); if (ret) { - dev_err(&pdev->dev, "failed to register mdio-mux bus %s\n", - np->full_name); + dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n", + np); return ret; } diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index c608e1dfaf09..942ceaf3fd3f 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -138,16 +138,16 @@ int mdio_mux_init(struct device *dev, r = of_property_read_u32(child_bus_node, "reg", &v); if (r) { dev_err(dev, - "Error: Failed to find reg for child %s\n", - of_node_full_name(child_bus_node)); + "Error: Failed to find reg for child %pOF\n", + child_bus_node); continue; } cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL); if (cb == NULL) { dev_err(dev, - "Error: Failed to allocate memory for child %s\n", - of_node_full_name(child_bus_node)); + "Error: Failed to allocate memory for child %pOF\n", + child_bus_node); ret_val = -ENOMEM; continue; } @@ -157,8 +157,8 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { dev_err(dev, - "Error: Failed to allocate MDIO bus for child %s\n", - of_node_full_name(child_bus_node)); + "Error: Failed to allocate MDIO bus for child %pOF\n", + child_bus_node); ret_val = -ENOMEM; devm_kfree(dev, cb); continue; @@ -174,8 +174,8 @@ int mdio_mux_init(struct device *dev, r = of_mdiobus_register(cb->mii_bus, child_bus_node); if (r) { dev_err(dev, - "Error: Failed to register MDIO bus for child %s\n", - of_node_full_name(child_bus_node)); + "Error: Failed to register MDIO bus for child %pOF\n", + child_bus_node); mdiobus_free(cb->mii_bus); devm_kfree(dev, cb); } else { diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2df7b62c1a36..b6f9fa670168 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -399,8 +399,7 @@ error: } /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); + gpiod_set_value_cansleep(bus->reset_gpiod, 1); device_del(&bus->dev); return err; @@ -425,8 +424,7 @@ void mdiobus_unregister(struct mii_bus *bus) } /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); + gpiod_set_value_cansleep(bus->reset_gpiod, 1); device_del(&bus->dev); } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 5068c582d502..b9d4922581de 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -30,7 +30,6 @@ #include <linux/ethtool.h> #include <linux/phy.h> #include <linux/phy_led_triggers.h> -#include <linux/timer.h> #include <linux/workqueue.h> #include <linux/mdio.h> #include <linux/io.h> @@ -705,8 +704,8 @@ EXPORT_SYMBOL(phy_start_aneg); * * Description: The PHY infrastructure can run a state machine * which tracks whether the PHY is starting up, negotiating, - * etc. This function starts the timer which tracks the state - * of the PHY. If you want to maintain your own state machine, + * etc. This function starts the delayed workqueue which tracks + * the state of the PHY. If you want to maintain your own state machine, * do not call this function. */ void phy_start_machine(struct phy_device *phydev) @@ -737,9 +736,9 @@ void phy_trigger_machine(struct phy_device *phydev, bool sync) * phy_stop_machine - stop the PHY state machine tracking * @phydev: target phy_device struct * - * Description: Stops the state machine timer, sets the state to UP - * (unless it wasn't up yet). This function must be called BEFORE - * phy_detach. + * Description: Stops the state machine delayed workqueue, sets the + * state to UP (unless it wasn't up yet). This function must be + * called BEFORE phy_detach. */ void phy_stop_machine(struct phy_device *phydev) { @@ -1229,9 +1228,10 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - phydev_dbg(phydev, "PHY state change %s -> %s\n", - phy_state_to_str(old_state), - phy_state_to_str(phydev->state)); + if (old_state != phydev->state) + phydev_dbg(phydev, "PHY state change %s -> %s\n", + phy_state_to_str(old_state), + phy_state_to_str(phydev->state)); /* Only re-schedule a PHY state machine change if we are polling the * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 3570c7576993..ca267fd28ab8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -943,9 +943,6 @@ static int set_offload(struct tap_queue *q, unsigned long arg) if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } - - if (arg & TUN_F_UFO) - feature_mask |= NETIF_F_UFO; } /* tun/tap driver inverts the usage for TSO offloads, where @@ -956,7 +953,7 @@ static int set_offload(struct tap_queue *q, unsigned long arg) * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ - if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; @@ -1078,7 +1075,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO)) + TUN_F_TSO_ECN)) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 32ad87345f57..68add55f8460 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -199,7 +199,7 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6|NETIF_F_UFO) + NETIF_F_TSO6) int align; int vnet_hdr_sz; @@ -1921,11 +1921,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } - - if (arg & TUN_F_UFO) { - features |= NETIF_F_UFO; - arg &= ~TUN_F_UFO; - } } /* This gives the user a way to test for new features in future by diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8f572b9f3625..811b18215cae 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -367,7 +367,7 @@ static struct attribute *cdc_ncm_sysfs_attrs[] = { NULL, }; -static struct attribute_group cdc_ncm_sysfs_attr_group = { +static const struct attribute_group cdc_ncm_sysfs_attr_group = { .name = "cdc_ncm", .attrs = cdc_ncm_sysfs_attrs, }; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 98f17b05c68b..e90de2186ffc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -57,6 +57,11 @@ DECLARE_EWMA(pkt_len, 0, 64) #define VIRTNET_DRIVER_VERSION "1.0.0" +const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO }; + struct virtnet_stats { struct u64_stats_sync tx_syncp; struct u64_stats_sync rx_syncp; @@ -164,10 +169,13 @@ struct virtnet_info { u8 ctrl_promisc; u8 ctrl_allmulti; u16 ctrl_vid; + u64 ctrl_offloads; /* Ethtool settings */ u8 duplex; u32 speed; + + unsigned long guest_offloads; }; struct padded_vnet_hdr { @@ -270,6 +278,23 @@ static void skb_xmit_done(struct virtqueue *vq) netif_wake_subqueue(vi->dev, vq2txq(vq)); } +#define MRG_CTX_HEADER_SHIFT 22 +static void *mergeable_len_to_ctx(unsigned int truesize, + unsigned int headroom) +{ + return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); +} + +static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) +{ + return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; +} + +static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) +{ + return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); +} + /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, @@ -390,19 +415,85 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; } +/* We copy the packet for XDP in the following cases: + * + * 1) Packet is scattered across multiple rx buffers. + * 2) Headroom space is insufficient. + * + * This is inefficient but it's a temporary condition that + * we hit right after XDP is enabled and until queue is refilled + * with large buffers with sufficient headroom - so it should affect + * at most queue size packets. + * Afterwards, the conditions to enable + * XDP should preclude the underlying device from sending packets + * across multiple buffers (num_buf > 1), and we make sure buffers + * have enough headroom. + */ +static struct page *xdp_linearize_page(struct receive_queue *rq, + u16 *num_buf, + struct page *p, + int offset, + int page_off, + unsigned int *len) +{ + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + return NULL; + + memcpy(page_address(page) + page_off, page_address(p) + offset, *len); + page_off += *len; + + while (--*num_buf) { + unsigned int buflen; + void *buf; + int off; + + buf = virtqueue_get_buf(rq->vq, &buflen); + if (unlikely(!buf)) + goto err_buf; + + p = virt_to_head_page(buf); + off = buf - page_address(p); + + /* guard against a misconfigured or uncooperative backend that + * is sending packet larger than the MTU. + */ + if ((page_off + buflen) > PAGE_SIZE) { + put_page(p); + goto err_buf; + } + + memcpy(page_address(page) + page_off, + page_address(p) + off, buflen); + page_off += buflen; + put_page(p); + } + + /* Headroom does not contribute to packet length */ + *len = page_off - VIRTIO_XDP_HEADROOM; + return page; +err_buf: + __free_pages(page, 0); + return NULL; +} + static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len) + void *buf, void *ctx, + unsigned int len) { struct sk_buff *skb; struct bpf_prog *xdp_prog; - unsigned int xdp_headroom = virtnet_get_headroom(vi); + unsigned int xdp_headroom = (unsigned long)ctx; unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; unsigned int headroom = vi->hdr_len + header_offset; unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct page *page = virt_to_head_page(buf); unsigned int delta = 0; + struct page *xdp_page; len -= vi->hdr_len; rcu_read_lock(); @@ -416,6 +507,27 @@ static struct sk_buff *receive_small(struct net_device *dev, if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; + if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { + int offset = buf - page_address(page) + header_offset; + unsigned int tlen = len + vi->hdr_len; + u16 num_buf = 1; + + xdp_headroom = virtnet_get_headroom(vi); + header_offset = VIRTNET_RX_PAD + xdp_headroom; + headroom = vi->hdr_len + header_offset; + buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + xdp_page = xdp_linearize_page(rq, &num_buf, page, + offset, header_offset, + &tlen); + if (!xdp_page) + goto err_xdp; + + buf = page_address(xdp_page); + put_page(page); + page = xdp_page; + } + xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; xdp.data_end = xdp.data + len; @@ -444,7 +556,7 @@ static struct sk_buff *receive_small(struct net_device *dev, skb = build_skb(buf, buflen); if (!skb) { - put_page(virt_to_head_page(buf)); + put_page(page); goto err; } skb_reserve(skb, headroom - delta); @@ -460,7 +572,7 @@ err: err_xdp: rcu_read_unlock(); dev->stats.rx_dropped++; - put_page(virt_to_head_page(buf)); + put_page(page); xdp_xmit: return NULL; } @@ -485,66 +597,6 @@ err: return NULL; } -/* The conditions to enable XDP should preclude the underlying device from - * sending packets across multiple buffers (num_buf > 1). However per spec - * it does not appear to be illegal to do so but rather just against convention. - * So in order to avoid making a system unresponsive the packets are pushed - * into a page and the XDP program is run. This will be extremely slow and we - * push a warning to the user to fix this as soon as possible. Fixing this may - * require resolving the underlying hardware to determine why multiple buffers - * are being received or simply loading the XDP program in the ingress stack - * after the skb is built because there is no advantage to running it here - * anymore. - */ -static struct page *xdp_linearize_page(struct receive_queue *rq, - u16 *num_buf, - struct page *p, - int offset, - unsigned int *len) -{ - struct page *page = alloc_page(GFP_ATOMIC); - unsigned int page_off = VIRTIO_XDP_HEADROOM; - - if (!page) - return NULL; - - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); - page_off += *len; - - while (--*num_buf) { - unsigned int buflen; - void *buf; - int off; - - buf = virtqueue_get_buf(rq->vq, &buflen); - if (unlikely(!buf)) - goto err_buf; - - p = virt_to_head_page(buf); - off = buf - page_address(p); - - /* guard against a misconfigured or uncooperative backend that - * is sending packet larger than the MTU. - */ - if ((page_off + buflen) > PAGE_SIZE) { - put_page(p); - goto err_buf; - } - - memcpy(page_address(page) + page_off, - page_address(p) + off, buflen); - page_off += buflen; - put_page(p); - } - - /* Headroom does not contribute to packet length */ - *len = page_off - VIRTIO_XDP_HEADROOM; - return page; -err_buf: - __free_pages(page, 0); - return NULL; -} - static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -559,6 +611,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; struct bpf_prog *xdp_prog; unsigned int truesize; + unsigned int headroom = mergeable_ctx_to_headroom(ctx); head_skb = NULL; @@ -571,10 +624,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u32 act; /* This happens when rx buffer size is underestimated */ - if (unlikely(num_buf > 1)) { + if (unlikely(num_buf > 1 || + headroom < virtnet_get_headroom(vi))) { /* linearize data for XDP */ xdp_page = xdp_linearize_page(rq, &num_buf, - page, offset, &len); + page, offset, + VIRTIO_XDP_HEADROOM, + &len); if (!xdp_page) goto err_xdp; offset = VIRTIO_XDP_HEADROOM; @@ -639,13 +695,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } rcu_read_unlock(); - if (unlikely(len > (unsigned long)ctx)) { + truesize = mergeable_ctx_to_truesize(ctx); + if (unlikely(len > truesize)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", dev->name, len, (unsigned long)ctx); dev->stats.rx_length_errors++; goto err_skb; } - truesize = (unsigned long)ctx; + head_skb = page_to_skb(vi, rq, page, offset, len, truesize); curr_skb = head_skb; @@ -665,13 +722,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } page = virt_to_head_page(buf); - if (unlikely(len > (unsigned long)ctx)) { + + truesize = mergeable_ctx_to_truesize(ctx); + if (unlikely(len > truesize)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", dev->name, len, (unsigned long)ctx); dev->stats.rx_length_errors++; goto err_skb; } - truesize = (unsigned long)ctx; num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { @@ -754,7 +812,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len); else - skb = receive_small(dev, vi, rq, buf, len); + skb = receive_small(dev, vi, rq, buf, ctx, len); if (unlikely(!skb)) return 0; @@ -787,12 +845,18 @@ frame_err: return 0; } +/* Unlike mergeable buffers, all buffers are allocated to the + * same size, except for the headroom. For this reason we do + * not need to use mergeable_len_to_ctx here - it is enough + * to store the headroom as the context ignoring the truesize. + */ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; char *buf; unsigned int xdp_headroom = virtnet_get_headroom(vi); + void *ctx = (void *)(unsigned long)xdp_headroom; int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; int err; @@ -806,10 +870,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, alloc_frag->offset += len; sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, vi->hdr_len + GOOD_PACKET_LEN); - err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); + err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); - return err; } @@ -902,7 +965,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, } sg_init_one(rq->sg, buf, len); - ctx = (void *)(unsigned long)len; + ctx = mergeable_len_to_ctx(len, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); @@ -1014,7 +1077,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget) void *buf; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); - if (vi->mergeable_rx_bufs) { + if (!vi->big_packets || vi->mergeable_rx_bufs) { void *ctx; while (received < budget && @@ -1813,7 +1876,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev) } static int init_vqs(struct virtnet_info *vi); -static void _remove_vq_common(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { @@ -1842,37 +1904,45 @@ static int virtnet_restore_up(struct virtio_device *vdev) return err; } -static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp) +static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) { - struct virtio_device *dev = vi->vdev; - int ret; + struct scatterlist sg; + vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads); - virtio_config_disable(dev); - dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - virtnet_freeze_down(dev); - _remove_vq_common(vi); + sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads)); - virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, + VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { + dev_warn(&vi->dev->dev, "Fail to set guest offload. \n"); + return -EINVAL; + } - ret = virtio_finalize_features(dev); - if (ret) - goto err; + return 0; +} - vi->xdp_queue_pairs = xdp_qp; - ret = virtnet_restore_up(dev); - if (ret) - goto err; - ret = _virtnet_set_queues(vi, curr_qp); - if (ret) - goto err; +static int virtnet_clear_guest_offloads(struct virtnet_info *vi) +{ + u64 offloads = 0; - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); - virtio_config_enable(dev); - return 0; -err: - virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); - return ret; + if (!vi->guest_offloads) + return 0; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) + offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM; + + return virtnet_set_guest_offloads(vi, offloads); +} + +static int virtnet_restore_guest_offloads(struct virtnet_info *vi) +{ + u64 offloads = vi->guest_offloads; + + if (!vi->guest_offloads) + return 0; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) + offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM; + + return virtnet_set_guest_offloads(vi, offloads); } static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, @@ -1884,10 +1954,11 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, u16 xdp_qp = 0, curr_qp; int i, err; - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) + && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) { NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); return -EOPNOTSUPP; } @@ -1921,35 +1992,35 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return PTR_ERR(prog); } - /* Changing the headroom in buffers is a disruptive operation because - * existing buffers must be flushed and reallocated. This will happen - * when a xdp program is initially added or xdp is disabled by removing - * the xdp program resulting in number of XDP queues changing. - */ - if (vi->xdp_queue_pairs != xdp_qp) { - err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp); - if (err) { - dev_warn(&dev->dev, "XDP reset failure.\n"); - goto virtio_reset_err; - } - } + /* Make sure NAPI is not using any XDP TX queues for RX. */ + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); + if (err) + goto err; + vi->xdp_queue_pairs = xdp_qp; for (i = 0; i < vi->max_queue_pairs; i++) { old_prog = rtnl_dereference(vi->rq[i].xdp_prog); rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) { + if (!old_prog) + virtnet_clear_guest_offloads(vi); + if (!prog) + virtnet_restore_guest_offloads(vi); + } if (old_prog) bpf_prog_put(old_prog); + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); } return 0; -virtio_reset_err: - /* On reset error do our best to unwind XDP changes inflight and return - * error up to user space for resolution. The underlying reset hung on - * us so not much we can do here. - */ +err: + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; @@ -2182,7 +2253,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); if (!names) goto err_names; - if (vi->mergeable_rx_bufs) { + if (!vi->big_packets || vi->mergeable_rx_bufs) { ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL); if (!ctx) goto err_ctx; @@ -2428,7 +2499,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { - dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO + dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6; } /* Individual feature bits: what can host handle? */ @@ -2438,13 +2509,11 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; - if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO)) - dev->hw_features |= NETIF_F_UFO; dev->features |= NETIF_F_GSO_ROBUST; if (gso) - dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO); + dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) @@ -2577,6 +2646,10 @@ static int virtnet_probe(struct virtio_device *vdev) netif_carrier_on(dev); } + for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) + if (virtio_has_feature(vi->vdev, guest_offloads[i])) + set_bit(guest_offloads[i], &vi->guest_offloads); + pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", dev->name, max_queue_pairs); @@ -2597,15 +2670,6 @@ free: return err; } -static void _remove_vq_common(struct virtnet_info *vi) -{ - vi->vdev->config->reset(vi->vdev); - free_unused_bufs(vi); - _free_receive_bufs(vi); - free_receive_page_frags(vi); - virtnet_del_vqs(vi); -} - static void remove_vq_common(struct virtnet_info *vi) { vi->vdev->config->reset(vi->vdev); @@ -2637,8 +2701,7 @@ static void virtnet_remove(struct virtio_device *vdev) free_netdev(vi->dev); } -#ifdef CONFIG_PM_SLEEP -static int virtnet_freeze(struct virtio_device *vdev) +static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; @@ -2649,7 +2712,7 @@ static int virtnet_freeze(struct virtio_device *vdev) return 0; } -static int virtnet_restore(struct virtio_device *vdev) +static __maybe_unused int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; int err; @@ -2665,7 +2728,6 @@ static int virtnet_restore(struct virtio_device *vdev) return 0; } -#endif static struct virtio_device_id id_table[] = { { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, @@ -2682,7 +2744,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \ - VIRTIO_NET_F_MTU + VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 96aa7e6cf214..dbca067540d0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2608,7 +2608,7 @@ static struct device_type vxlan_type = { * supply the listening VXLAN udp ports. Callers are expected * to implement the ndo_udp_tunnel_add. */ -static void vxlan_push_rx_ports(struct net_device *dev) +static void vxlan_offload_rx_ports(struct net_device *dev, bool push) { struct vxlan_sock *vs; struct net *net = dev_net(dev); @@ -2617,11 +2617,19 @@ static void vxlan_push_rx_ports(struct net_device *dev) spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { - hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) - udp_tunnel_push_rx_port(dev, vs->sock, - (vs->flags & VXLAN_F_GPE) ? - UDP_TUNNEL_TYPE_VXLAN_GPE : - UDP_TUNNEL_TYPE_VXLAN); + hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { + unsigned short type; + + if (vs->flags & VXLAN_F_GPE) + type = UDP_TUNNEL_TYPE_VXLAN_GPE; + else + type = UDP_TUNNEL_TYPE_VXLAN; + + if (push) + udp_tunnel_push_rx_port(dev, vs->sock, type); + else + udp_tunnel_drop_rx_port(dev, vs->sock, type); + } } spin_unlock(&vn->sock_lock); } @@ -3630,10 +3638,15 @@ static int vxlan_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); - if (event == NETDEV_UNREGISTER) + if (event == NETDEV_UNREGISTER) { + vxlan_offload_rx_ports(dev, false); vxlan_handle_lowerdev_unregister(vn, dev); - else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) - vxlan_push_rx_ports(dev); + } else if (event == NETDEV_REGISTER) { + vxlan_offload_rx_ports(dev, true); + } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || + event == NETDEV_UDP_TUNNEL_DROP_INFO) { + vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); + } return NOTIFY_DONE; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c index 1447a8352383..2d3e5e263a32 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c @@ -78,10 +78,7 @@ int brcmf_debug_attach(struct brcmf_pub *drvr) return -ENODEV; drvr->dbgfs_dir = debugfs_create_dir(dev_name(dev), root_folder); - if (IS_ERR(drvr->dbgfs_dir)) - return PTR_ERR(drvr->dbgfs_dir); - - return 0; + return PTR_ERR_OR_ZERO(drvr->dbgfs_dir); } void brcmf_debug_detach(struct brcmf_pub *drvr) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 84143a02adce..54201c02fdb8 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7837,7 +7837,7 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) { struct airo_info *ai = dev->ml_priv; int ridcode; int enabled; - static int (* writer)(struct airo_info *, u16 rid, const void *, int, int); + int (*writer)(struct airo_info *, u16 rid, const void *, int, int); unsigned char *iobuf; /* Only super-user can write RIDs */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index aaaca4d08e2b..ccbe74589eec 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -4324,7 +4324,7 @@ static struct attribute *ipw2100_sysfs_entries[] = { NULL, }; -static struct attribute_group ipw2100_attribute_group = { +static const struct attribute_group ipw2100_attribute_group = { .attrs = ipw2100_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 9368abdf18e2..c311b1a994c1 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -11500,7 +11500,7 @@ static struct attribute *ipw_sysfs_entries[] = { NULL }; -static struct attribute_group ipw_attribute_group = { +static const struct attribute_group ipw_attribute_group = { .name = NULL, /* put in device directory */ .attrs = ipw_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 38bf403bb1e1..329f3a63dadd 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -3464,7 +3464,7 @@ static struct attribute *il3945_sysfs_entries[] = { NULL }; -static struct attribute_group il3945_attribute_group = { +static const struct attribute_group il3945_attribute_group = { .name = NULL, /* put in device directory */ .attrs = il3945_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 5b51fba75595..de9b6522c43f 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4654,7 +4654,7 @@ static struct attribute *il_sysfs_entries[] = { NULL }; -static struct attribute_group il_attribute_group = { +static const struct attribute_group il_attribute_group = { .name = NULL, /* put in device directory */ .attrs = il_sysfs_entries, }; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c index 55f238a2a310..c58393eab6a1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c @@ -478,7 +478,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index) struct rtl_priv *rtlpriv = rtl_priv(hw); u16 read_point = 0, write_point = 0, remind_cnt = 0; u32 tmp_4byte = 0; - static u16 last_read_point; static bool start_rx; tmp_4byte = rtl_read_dword(rtlpriv, REG_RXQ_TXBD_IDX); @@ -506,7 +505,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index) rtlpci->rx_ring[queue_index].next_rx_rp = write_point; - last_read_point = read_point; return remind_cnt; } @@ -917,7 +915,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, struct rtl_priv *rtlpriv = rtl_priv(hw); u16 cur_tx_rp = 0; u16 cur_tx_wp = 0; - static u16 last_txw_point; static bool over_run; u32 tmp = 0; u8 q_idx = *val; @@ -951,9 +948,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, rtl_write_word(rtlpriv, get_desc_addr_fr_q_idx(q_idx), ring->cur_tx_wp); - - if (q_idx == 1) - last_txw_point = cur_tx_wp; } if (ring->avl_desc < (max_tx_desc - 15)) { diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index e0dbd6e48a98..a0d27c04e22f 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -422,13 +422,11 @@ int of_phy_register_fixed_link(struct device_node *np) struct fixed_phy_status status = {}; struct device_node *fixed_link_node; const __be32 *fixed_link_prop; - int link_gpio; - int len, err; struct phy_device *phy; const char *managed; + int link_gpio, len; - err = of_property_read_string(np, "managed", &managed); - if (err == 0) { + if (of_property_read_string(np, "managed", &managed) == 0) { if (strcmp(managed, "in-band-status") == 0) { /* status is zeroed, namely its .link member */ phy = fixed_phy_register(PHY_POLL, &status, -1, np); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5e1b548828e6..9aaa177e8209 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -391,7 +391,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, vq->desc_state[head].data = data; if (indirect) vq->desc_state[head].indir_desc = desc; - if (ctx) + else vq->desc_state[head].indir_desc = ctx; /* Put entry in available array (but don't update avail->idx until they diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 100b207efc9e..c05f1f1c0d41 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> -#include <rxrpc/packet.h> #include "internal.h" #include "afs_fs.h" diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 02781e78ffb6..10743043d431 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -14,7 +14,6 @@ #include <net/sock.h> #include <net/af_rxrpc.h> -#include <rxrpc/packet.h> #include "internal.h" #include "afs_cm.h" diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5869ff..6353c7474dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } + +static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} + +static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __dev_map_flush(struct bpf_map *map) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3d137c33d664..b1e1035ca24b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#endif diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 83cc9863444b..afdbb701fdb4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -374,5 +374,9 @@ struct ethtool_ops { struct ethtool_link_ksettings *); int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); + int (*get_fecparam)(struct net_device *, + struct ethtool_fecparam *); + int (*set_fecparam)(struct net_device *, + struct ethtool_fecparam *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index bfef1e5734f8..d19ed3c15e1e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -711,7 +711,21 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the + * same cpu context. Further for best results no more than a single map + * for the do_redirect/do_flush pair should be used. This limitation is + * because we only track one map and force a flush when the map changes. + * This does not appear to be a real limitation for existing software. + */ +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); +int xdp_do_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *prog); +void xdp_do_flush_map(void); + void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_redirect(u32 ifindex); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1d4737cffc71..dc8b4896b77b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -36,7 +36,6 @@ enum { /**/NETIF_F_GSO_SHIFT, /* keep the order of SKB_GSO_* bits */ NETIF_F_TSO_BIT /* ... TCPv4 segmentation */ = NETIF_F_GSO_SHIFT, - NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ @@ -76,6 +75,7 @@ enum { NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ + NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ /* * Add your fresh new feature above and remember to update @@ -118,7 +118,6 @@ enum { #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) #define NETIF_F_TSO __NETIF_F(TSO) -#define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) @@ -140,6 +139,7 @@ enum { #define NETIF_F_HW_TC __NETIF_F(HW_TC) #define NETIF_F_HW_ESP __NETIF_F(HW_ESP) #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) +#define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) @@ -172,7 +172,7 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ NETIF_F_GSO_SCTP) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..3a3cdc1b1f31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -66,6 +66,7 @@ struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; struct bpf_prog; +struct xdp_buff; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1138,7 +1139,12 @@ struct xfrmdev_ops { * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); * This function is used to set or query state related to XDP on the * netdevice. See definition of enum xdp_netdev_command for details. - * + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * This function is used to submit a XDP packet for transmit on a + * netdevice. + * void (*ndo_xdp_flush)(struct net_device *dev); + * This function is used to inform the driver to flush a paticular + * xpd tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1323,6 +1329,9 @@ struct net_device_ops { int needed_headroom); int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + int (*ndo_xdp_xmit)(struct net_device *dev, + struct xdp_buff *xdp); + void (*ndo_xdp_flush)(struct net_device *dev); }; /** @@ -2308,6 +2317,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_UDP_TUNNEL_DROP_INFO 0x001D #define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); @@ -2423,8 +2433,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); -int dev_close(struct net_device *dev); -int dev_close_many(struct list_head *head, bool unlink); +void dev_close(struct net_device *dev); +void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); @@ -4089,7 +4099,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h new file mode 100644 index 000000000000..8a5f9f0b2c52 --- /dev/null +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -0,0 +1,13 @@ +#ifndef __MDIO_BCM_UNIMAC_PDATA_H +#define __MDIO_BCM_UNIMAC_PDATA_H + +struct unimac_mdio_pdata { + u32 phy_mask; + int (*wait_func)(void *data); + void *wait_func_data; + const char *bus_name; +}; + +#define UNIMAC_MDIO_DRV_NAME "unimac-mdio" + +#endif /* __MDIO_BCM_UNIMAC_PDATA_H */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 0eef0a2b1901..d60de4a39810 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -323,6 +323,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, bool en_searcher); + int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ef39c7f40ae6..cc646ca97974 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -161,6 +161,18 @@ enum qed_nvm_images { QED_NVM_IMAGE_FCOE_CFG, }; +struct qed_link_eee_params { + u32 tx_lpi_timer; +#define QED_EEE_1G_ADV BIT(0) +#define QED_EEE_10G_ADV BIT(1) + + /* Capabilities are represented using QED_EEE_*_ADV values */ + u8 adv_caps; + u8 lp_adv_caps; + bool enable; + bool tx_lpi_enable; +}; + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, @@ -172,8 +184,9 @@ enum qed_led_mode { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) -#define QED_COALESCE_MAX 0xFF +#define QED_COALESCE_MAX 0x1FF #define QED_DEFAULT_RX_USECS 12 +#define QED_DEFAULT_TX_USECS 48 /* forward */ struct qed_dev; @@ -408,6 +421,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) #define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) +#define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) u32 override_flags; bool autoneg; u32 adv_speeds; @@ -422,6 +436,7 @@ struct qed_link_params { #define QED_LINK_LOOPBACK_EXT BIT(3) #define QED_LINK_LOOPBACK_MAC BIT(4) u32 loopback_mode; + struct qed_link_eee_params eee; }; struct qed_link_output { @@ -437,6 +452,12 @@ struct qed_link_output { u8 port; /* In PORT defs */ bool autoneg; u32 pause_config; + + /* EEE - capability & param */ + bool eee_supported; + bool eee_active; + u8 sup_caps; + struct qed_link_eee_params eee; }; struct qed_probe_params { @@ -654,16 +675,6 @@ struct qed_common_ops { enum qed_nvm_images type, u8 *buf, u16 len); /** - * @brief get_coalesce - Get coalesce parameters in usec - * - * @param cdev - * @param rx_coal - Rx coalesce value in usec - * @param tx_coal - Tx coalesce value in usec - * - */ - void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal); - -/** * @brief set_coalesce - Configure Rx coalesce value in usec * * @param cdev @@ -674,8 +685,8 @@ struct qed_common_ops { * * @return 0 on success, error otherwise. */ - int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u16 qid, u16 sb_id); + int (*set_coalesce)(struct qed_dev *cdev, + u16 rx_coal, u16 tx_coal, void *handle); /** * @brief set_led - Configure LED mode diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 99e866487e2f..ba007163acfd 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -273,87 +273,85 @@ struct sctp_init_chunk { /* Section 3.3.2.1. IPv4 Address Parameter (5) */ -typedef struct sctp_ipv4addr_param { +struct sctp_ipv4addr_param { struct sctp_paramhdr param_hdr; - struct in_addr addr; -} sctp_ipv4addr_param_t; + struct in_addr addr; +}; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ -typedef struct sctp_ipv6addr_param { +struct sctp_ipv6addr_param { struct sctp_paramhdr param_hdr; struct in6_addr addr; -} sctp_ipv6addr_param_t; +}; /* Section 3.3.2.1 Cookie Preservative (9) */ -typedef struct sctp_cookie_preserve_param { +struct sctp_cookie_preserve_param { struct sctp_paramhdr param_hdr; - __be32 lifespan_increment; -} sctp_cookie_preserve_param_t; + __be32 lifespan_increment; +}; /* Section 3.3.2.1 Host Name Address (11) */ -typedef struct sctp_hostname_param { +struct sctp_hostname_param { struct sctp_paramhdr param_hdr; uint8_t hostname[0]; -} sctp_hostname_param_t; +}; /* Section 3.3.2.1 Supported Address Types (12) */ -typedef struct sctp_supported_addrs_param { +struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; __be16 types[0]; -} sctp_supported_addrs_param_t; - -/* Appendix A. ECN Capable (32768) */ -typedef struct sctp_ecn_capable_param { - struct sctp_paramhdr param_hdr; -} sctp_ecn_capable_param_t; +}; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ -typedef struct sctp_adaptation_ind_param { +struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} sctp_adaptation_ind_param_t; +}; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ -typedef struct sctp_supported_ext_param { +struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_supported_ext_param_t; +}; /* AUTH Section 3.1 Random */ -typedef struct sctp_random_param { +struct sctp_random_param { struct sctp_paramhdr param_hdr; __u8 random_val[0]; -} sctp_random_param_t; +}; /* AUTH Section 3.2 Chunk List */ -typedef struct sctp_chunks_param { +struct sctp_chunks_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_chunks_param_t; +}; /* AUTH Section 3.3 HMAC Algorithm */ -typedef struct sctp_hmac_algo_param { +struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; -} sctp_hmac_algo_param_t; +}; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP * association. */ -typedef struct sctp_init_chunk sctp_initack_chunk_t; +struct sctp_initack_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_inithdr init_hdr; +}; /* Section 3.3.3.1 State Cookie (7) */ -typedef struct sctp_cookie_param { +struct sctp_cookie_param { struct sctp_paramhdr p; __u8 body[0]; -} sctp_cookie_param_t; +}; /* Section 3.3.3.1 Unrecognized Parameters (8) */ -typedef struct sctp_unrecognized_param { +struct sctp_unrecognized_param { struct sctp_paramhdr param_hdr; struct sctp_paramhdr unrecognized; -} sctp_unrecognized_param_t; +}; @@ -365,30 +363,28 @@ typedef struct sctp_unrecognized_param { * subsequences of DATA chunks as represented by their TSNs. */ -typedef struct sctp_gap_ack_block { +struct sctp_gap_ack_block { __be16 start; __be16 end; -} sctp_gap_ack_block_t; - -typedef __be32 sctp_dup_tsn_t; +}; -typedef union { - sctp_gap_ack_block_t gab; - sctp_dup_tsn_t dup; -} sctp_sack_variable_t; +union sctp_sack_variable { + struct sctp_gap_ack_block gab; + __be32 dup; +}; -typedef struct sctp_sackhdr { +struct sctp_sackhdr { __be32 cum_tsn_ack; __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - sctp_sack_variable_t variable[0]; -} sctp_sackhdr_t; + union sctp_sack_variable variable[0]; +}; -typedef struct sctp_sack_chunk { +struct sctp_sack_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_sackhdr_t sack_hdr; -} sctp_sack_chunk_t; + struct sctp_sackhdr sack_hdr; +}; /* RFC 2960. Section 3.3.5 Heartbeat Request (HEARTBEAT) (4): @@ -398,23 +394,23 @@ typedef struct sctp_sack_chunk { * the present association. */ -typedef struct sctp_heartbeathdr { +struct sctp_heartbeathdr { struct sctp_paramhdr info; -} sctp_heartbeathdr_t; +}; -typedef struct sctp_heartbeat_chunk { +struct sctp_heartbeat_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_heartbeathdr_t hb_hdr; -} sctp_heartbeat_chunk_t; + struct sctp_heartbeathdr hb_hdr; +}; /* For the abort and shutdown ACK we must carry the init tag in the * common header. Just the common header is all that is needed with a * chunk descriptor. */ -typedef struct sctp_abort_chunk { +struct sctp_abort_chunk { struct sctp_chunkhdr uh; -} sctp_abort_chunk_t; +}; /* For the graceful shutdown we must carry the tag (in common header) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..4093552be1de 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -463,39 +463,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 @@ -945,12 +944,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 542ca1ae02c4..d7389ea36e10 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -148,12 +148,6 @@ struct tcp_sock { u16 gso_segs; /* Max number of segs per GSO packet */ /* - * Header prediction flags - * 0x5?10 << 16 + snd_wnd in net byte order - */ - __be32 pred_flags; - -/* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. @@ -192,15 +186,6 @@ struct tcp_sock { struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - /* Data for direct copy to user */ - struct { - struct sk_buff_head prequeue; - struct task_struct *task; - struct msghdr *msg; - int memory; - int len; - } ucopy; - u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5209b5ed2a64..32fb046f2173 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; default: return -EINVAL; } @@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 53b1a2cca421..afb37f835449 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,7 +58,6 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned char recursion_level; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 diff --git a/include/net/dsa.h b/include/net/dsa.h index 58969b9a090c..88da272d20d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } -static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) -{ - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; -} - static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include <linux/interrupt.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/notifier.h> - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f2a215fc78e4..950ed182f62f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -33,18 +33,12 @@ struct inetpeer_addr { }; struct inet_peer { - /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer __rcu *avl_left, *avl_right; + struct rb_node rb_node; struct inetpeer_addr daddr; - __u32 avl_height; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - union { - struct list_head gc_list; - struct rcu_head gc_rcu; - }; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid @@ -55,7 +49,6 @@ struct inet_peer { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; - struct inet_peer *gc_next; }; /* following fields might be frequently dirtied */ @@ -64,7 +57,7 @@ struct inet_peer { }; struct inet_peer_base { - struct inet_peer __rcu *root; + struct rb_root rb_root; seqlock_t lock; int total; }; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 199056933dcb..907d39a42f6b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen != 128 && + (rt->rt6i_dst.plen < 127 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..82dd298b40c7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -178,6 +178,7 @@ enum { NLA_S16, NLA_S32, NLA_S64, + NLA_BITFIELD32, __NLA_TYPE_MAX, }; @@ -206,6 +207,7 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" + * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute * All other Minimum length of attribute payload * * Example: @@ -213,11 +215,13 @@ enum { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ struct nla_policy { u16 type; u16 len; + void *validation_data; }; /** @@ -1203,6 +1207,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) } /** + * nla_get_bitfield32 - return payload of 32 bitfield attribute + * @nla: nla_bitfield32 attribute + */ +static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla) +{ + struct nla_bitfield32 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + return tmp; +} + +/** * nla_memdup - duplicate attribute memory (kmemdup) * @src: netlink attribute to duplicate from * @gfp: GFP mask diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include <linux/workqueue.h> #include <linux/xfrm.h> #include <net/dst_ops.h> -#include <net/flowcache.h> struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4679e7a5ed5..1d5f6ff3f440 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -135,7 +135,7 @@ typedef union { struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; } sctp_arg_t; @@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) -SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) +SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) static inline sctp_arg_t SCTP_FORCE(void) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5ab29af8ca8a..66cd7639b912 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1556,9 +1556,9 @@ struct sctp_association { * and authenticated chunk list. All that is part of the * cookie and these are just pointers to those locations */ - sctp_random_param_t *peer_random; - sctp_chunks_param_t *peer_chunks; - sctp_hmac_algo_param_t *peer_hmacs; + struct sctp_random_param *peer_random; + struct sctp_chunks_param *peer_chunks; + struct sctp_hmac_algo_param *peer_hmacs; } peer; /* State : A state variable indicating what state the diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..3ecb62811004 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ -#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ - #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) @@ -257,7 +256,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; -extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; @@ -363,7 +361,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); + const struct tcphdr *th); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); @@ -633,29 +631,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { @@ -905,9 +880,8 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ - CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ - CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ + CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ + CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ }; /* @@ -1245,17 +1219,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __tcp_checksum_complete(skb); } -/* Prequeue for VJ style copy to user, combined with checksumming. */ - -static inline void tcp_prequeue_init(struct tcp_sock *tp) -{ - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - tp->ucopy.memory = 0; - skb_queue_head_init(&tp->ucopy.prequeue); -} - -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 02c5be037451..10cce0dd4450 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -115,6 +115,8 @@ struct udp_tunnel_info { /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); @@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev) call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; @@ -563,7 +564,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +978,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1b61357d3f57..7b1eb7b4be41 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -12,7 +12,8 @@ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ - FN(TX) + FN(TX) \ + FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); @@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) ); +TRACE_EVENT(xdp_redirect, + + TP_PROTO(const struct net_device *from, + const struct net_device *to, + const struct bpf_prog *xdp, u32 act), + + TP_ARGS(from, to, xdp, act), + + TP_STRUCT__entry( + __string(name_from, from->name) + __string(name_to, to->name) + __array(u8, prog_tag, 8) + __field(u32, act) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); + memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __assign_str(name_from, from->name); + __assign_str(name_to, to->name); + __entry->act = act; + ), + + TP_printk("prog=%s from=%s to=%s action=%s", + __print_hex_str(__entry->prog_tag, 8), + __get_str(name_from), __get_str(name_to), + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) +); #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6f8b37..1106a8c4cd36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { @@ -347,6 +348,11 @@ union bpf_attr { * @flags: bit 0 - if set, redirect to ingress instead of egress * other bits - reserved * Return: TC_ACT_REDIRECT + * int bpf_redirect_map(key, map, flags) + * redirect to endpoint in map + * @key: index in map to lookup + * @map: fd of map to do lookup in + * @flags: -- * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -591,7 +597,8 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -717,6 +724,7 @@ enum xdp_action { XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7d4a594d5d58..9c041dae8e2c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op { char data[]; }; +/** + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM + * @active_fec: FEC mode which is active on porte + * @fec: Bitmask of supported/configured FEC modes + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + */ +struct ethtool_fecparam { + __u32 cmd; + /* bitmask of FEC modes */ + __u32 active_fec; + __u32 fec; + __u32 reserved; +}; + +/** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver + * @ETHTOOL_FEC_OFF: No FEC Mode + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + */ +enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, + ETHTOOL_FEC_AUTO_BIT, + ETHTOOL_FEC_OFF_BIT, + ETHTOOL_FEC_RS_BIT, + ETHTOOL_FEC_BASER_BIT, +}; + +#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) +#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) +#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) +#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) +#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. * Please use ETHTOOL_GLINKSETTINGS @@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op { #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ #define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ #define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ +#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ +#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET @@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + = ETHTOOL_LINK_MODE_FEC_BASER_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f86127a46cfc..f4fc9c9e123d 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -226,5 +226,22 @@ struct nlattr { #define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; #endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d148505010a7..dab7dad9e01a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -683,10 +683,29 @@ struct tcamsg { unsigned char tca__pad1; unsigned short tca__pad2; }; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + #define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) -#define TCA_ACT_TAB 1 /* attr type must be >=1 */ -#define TCAA_MAX 1 +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO + * actions in a dump. All dump responses will contain the number of actions + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) diff --git a/include/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 7343f71783dc..9656aad8f8f7 100644 --- a/include/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -1,17 +1,18 @@ -/* AF_RXRPC parameters +/* Types and definitions for AF_RXRPC. * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License + * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * 2 of the Licence, or (at your option) any later version. */ -#ifndef _LINUX_RXRPC_H -#define _LINUX_RXRPC_H +#ifndef _UAPI_LINUX_RXRPC_H +#define _UAPI_LINUX_RXRPC_H +#include <linux/types.h> #include <linux/in.h> #include <linux/in6.h> @@ -76,4 +77,48 @@ enum rxrpc_cmsg_type { #define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ #define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ -#endif /* _LINUX_RXRPC_H */ +/* + * RxRPC-level abort codes + */ +#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ +#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ +#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ +#define RX_EOF -4 /* unexpected end of data on read op */ +#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ +#define RX_USER_ABORT -6 /* generic user abort */ +#define RX_ADDRINUSE -7 /* UDP port in use */ +#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ + +/* + * (un)marshalling abort codes (rxgen) + */ +#define RXGEN_CC_MARSHAL -450 +#define RXGEN_CC_UNMARSHAL -451 +#define RXGEN_SS_MARSHAL -452 +#define RXGEN_SS_UNMARSHAL -453 +#define RXGEN_DECODE -454 +#define RXGEN_OPCODE -455 +#define RXGEN_SS_XDRFREE -456 +#define RXGEN_CC_XDRFREE -457 + +/* + * Rx kerberos security abort codes + * - unfortunately we have no generalised security abort codes to say things + * like "unsupported security", so we have to use these instead and hope the + * other side understands + */ +#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ +#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ +#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ +#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ +#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ +#define RXKADNOAUTH 19270405 /* caller not authorised */ +#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ +#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ +#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ +#define RXKADEXPIRED 19270409 /* authentication expired */ +#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ +#define RXKADDATALEN 19270411 /* user data too long */ +#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ + +#endif /* _UAPI_LINUX_RXRPC_H */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index d85693295798..b3f346fb9fe3 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,14 +184,7 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ - LINUX_MIB_TCPPREQUEUED, /* TCPPrequeued */ - LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, /* TCPDirectCopyFromBacklog */ - LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, /* TCPDirectCopyFromPrequeue */ - LINUX_MIB_TCPPREQUEUEDROPPED, /* TCPPrequeueDropped */ - LINUX_MIB_TCPHPHITS, /* TCPHPHits */ - LINUX_MIB_TCPHPHITSTOUSER, /* TCPHPHitsToUser */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ - LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ @@ -208,14 +201,12 @@ enum LINUX_MIB_TCPSACKFAILURES, /* TCPSackFailures */ LINUX_MIB_TCPLOSSFAILURES, /* TCPLossFailures */ LINUX_MIB_TCPFASTRETRANS, /* TCPFastRetrans */ - LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ - LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ LINUX_MIB_TCPRCVCOLLAPSED, /* TCPRcvCollapsed */ LINUX_MIB_TCPDSACKOLDSENT, /* TCPDSACKOldSent */ LINUX_MIB_TCPDSACKOFOSENT, /* TCPDSACKOfoSent */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index a5507c977497..030e594bab45 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -231,6 +231,14 @@ enum { TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ + TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */ + TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */ + TCP_NLA_SND_CWND, /* Sending congestion window */ + TCP_NLA_REORDERING, /* Reordering metric */ + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + }; /* for TCP_MD5SIG socket option */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e1e5e658f2db..48e92705be59 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,6 +2,9 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +ifeq ($(CONFIG_NET),y) +obj-$(CONFIG_BPF_SYSCALL) += devmap.o +endif ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c new file mode 100644 index 000000000000..d439ee0eadb1 --- /dev/null +++ b/kernel/bpf/devmap.c @@ -0,0 +1,432 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* Devmaps primary use is as a backend map for XDP BPF helper call + * bpf_redirect_map(). Because XDP is mostly concerned with performance we + * spent some effort to ensure the datapath with redirect maps does not use + * any locking. This is a quick note on the details. + * + * We have three possible paths to get into the devmap control plane bpf + * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall + * will invoke an update, delete, or lookup operation. To ensure updates and + * deletes appear atomic from the datapath side xchg() is used to modify the + * netdev_map array. Then because the datapath does a lookup into the netdev_map + * array (read-only) from an RCU critical section we use call_rcu() to wait for + * an rcu grace period before free'ing the old data structures. This ensures the + * datapath always has a valid copy. However, the datapath does a "flush" + * operation that pushes any pending packets in the driver outside the RCU + * critical section. Each bpf_dtab_netdev tracks these pending operations using + * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed + * until all bits are cleared indicating outstanding flush operations have + * completed. + * + * BPF syscalls may race with BPF program calls on any of the update, delete + * or lookup operations. As noted above the xchg() operation also keep the + * netdev_map consistent in this case. From the devmap side BPF programs + * calling into these operations are the same as multiple user space threads + * making system calls. + * + * Finally, any of the above may race with a netdev_unregister notifier. The + * unregister notifier must search for net devices in the map structure that + * contain a reference to the net device and remove them. This is a two step + * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) + * check to see if the ifindex is the same as the net_device being removed. + * Unfortunately, the xchg() operations do not protect against this. To avoid + * potentially removing incorrect objects the dev_map_list_mutex protects + * conflicting netdev unregister and BPF syscall operations. Updates and + * deletes from a BPF program (done in rcu critical section) are blocked + * because of this mutex. + */ +#include <linux/bpf.h> +#include <linux/jhash.h> +#include <linux/filter.h> +#include <linux/rculist_nulls.h> +#include "percpu_freelist.h" +#include "bpf_lru_list.h" +#include "map_in_map.h" + +struct bpf_dtab_netdev { + struct net_device *dev; + int key; + struct rcu_head rcu; + struct bpf_dtab *dtab; +}; + +struct bpf_dtab { + struct bpf_map map; + struct bpf_dtab_netdev **netdev_map; + unsigned long int __percpu *flush_needed; + struct list_head list; +}; + +static DEFINE_MUTEX(dev_map_list_mutex); +static LIST_HEAD(dev_map_list); + +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +{ + struct bpf_dtab *dtab; + u64 cost; + int err; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags) + return ERR_PTR(-EINVAL); + + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + if (attr->value_size > KMALLOC_MAX_SIZE) + return ERR_PTR(-E2BIG); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + /* mandatory map attributes */ + dtab->map.map_type = attr->map_type; + dtab->map.key_size = attr->key_size; + dtab->map.value_size = attr->value_size; + dtab->map.max_entries = attr->max_entries; + dtab->map.map_flags = attr->map_flags; + + err = -ENOMEM; + + /* make sure page count doesn't overflow */ + cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); + cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); + if (cost >= U32_MAX - PAGE_SIZE) + goto free_dtab; + + dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + /* if map size is larger than memlock limit, reject it early */ + err = bpf_map_precharge_memlock(dtab->map.pages); + if (err) + goto free_dtab; + + err = -ENOMEM; + /* A per cpu bitfield with a bit per possible net device */ + dtab->flush_needed = __alloc_percpu( + BITS_TO_LONGS(attr->max_entries) * + sizeof(unsigned long), + __alignof__(unsigned long)); + if (!dtab->flush_needed) + goto free_dtab; + + dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * + sizeof(struct bpf_dtab_netdev *)); + if (!dtab->netdev_map) + goto free_dtab; + + mutex_lock(&dev_map_list_mutex); + list_add_tail(&dtab->list, &dev_map_list); + mutex_unlock(&dev_map_list_mutex); + return &dtab->map; + +free_dtab: + free_percpu(dtab->flush_needed); + kfree(dtab); + return ERR_PTR(err); +} + +static void dev_map_free(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + int i, cpu; + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further reads against netdev_map. It does __not__ ensure pending + * flush operations (if any) are complete. + */ + synchronize_rcu(); + + /* To ensure all pending flush operations have completed wait for flush + * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. + * Because the above synchronize_rcu() ensures the map is disconnected + * from the program we can assume no new bits will be set. + */ + for_each_online_cpu(cpu) { + unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu); + + while (!bitmap_empty(bitmap, dtab->map.max_entries)) + cpu_relax(); + } + + /* Although we should no longer have datapath or bpf syscall operations + * at this point we we can still race with netdev notifier, hence the + * lock. + */ + mutex_lock(&dev_map_list_mutex); + for (i = 0; i < dtab->map.max_entries; i++) { + struct bpf_dtab_netdev *dev; + + dev = dtab->netdev_map[i]; + if (!dev) + continue; + + dev_put(dev->dev); + kfree(dev); + } + + /* At this point bpf program is detached and all pending operations + * _must_ be complete + */ + list_del(&dtab->list); + mutex_unlock(&dev_map_list_mutex); + free_percpu(dtab->flush_needed); + bpf_map_area_free(dtab->netdev_map); + kfree(dtab); +} + +static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = (u32 *)next_key; + + if (index >= dtab->map.max_entries) { + *next = 0; + return 0; + } + + if (index == dtab->map.max_entries - 1) + return -ENOENT; + + *next = index + 1; + return 0; +} + +void __dev_map_insert_ctx(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); + + __set_bit(key, bitmap); +} + +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev; + + if (key >= map->max_entries) + return NULL; + + dev = READ_ONCE(dtab->netdev_map[key]); + return dev ? dev->dev : NULL; +} + +/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled + * from the driver before returning from its napi->poll() routine. The poll() + * routine is called either from busy_poll context or net_rx_action signaled + * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the + * net device can be torn down. On devmap tear down we ensure the ctx bitmap + * is zeroed before completing to ensure all flush operations have completed. + */ +void __dev_map_flush(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); + u32 bit; + + for_each_set_bit(bit, bitmap, map->max_entries) { + struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); + struct net_device *netdev; + + /* This is possible if the dev entry is removed by user space + * between xdp redirect and flush op. + */ + if (unlikely(!dev)) + continue; + + netdev = dev->dev; + + __clear_bit(bit, bitmap); + if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush)) + continue; + + netdev->netdev_ops->ndo_xdp_flush(netdev); + } +} + +/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or + * update happens in parallel here a dev_put wont happen until after reading the + * ifindex. + */ +static void *dev_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev; + u32 i = *(u32 *)key; + + if (i >= map->max_entries) + return NULL; + + dev = READ_ONCE(dtab->netdev_map[i]); + return dev ? &dev->dev->ifindex : NULL; +} + +static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev) +{ + if (old_dev->dev->netdev_ops->ndo_xdp_flush) { + struct net_device *fl = old_dev->dev; + unsigned long *bitmap; + int cpu; + + for_each_online_cpu(cpu) { + bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu); + __clear_bit(old_dev->key, bitmap); + + fl->netdev_ops->ndo_xdp_flush(old_dev->dev); + } + } +} + +static void __dev_map_entry_free(struct rcu_head *rcu) +{ + struct bpf_dtab_netdev *old_dev; + + old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); + dev_map_flush_old(old_dev); + dev_put(old_dev->dev); + kfree(old_dev); +} + +static int dev_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *old_dev; + int k = *(u32 *)key; + + if (k >= map->max_entries) + return -EINVAL; + + /* Use synchronize_rcu() here to ensure any rcu critical sections + * have completed, but this does not guarantee a flush has happened + * yet. Because driver side rcu_read_lock/unlock only protects the + * running XDP program. However, for pending flush operations the + * dev and ctx are stored in another per cpu map. And additionally, + * the driver tear down ensures all soft irqs are complete before + * removing the net device in the case of dev_put equals zero. + */ + mutex_lock(&dev_map_list_mutex); + old_dev = xchg(&dtab->netdev_map[k], NULL); + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + mutex_unlock(&dev_map_list_mutex); + return 0; +} + +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct net *net = current->nsproxy->net_ns; + struct bpf_dtab_netdev *dev, *old_dev; + u32 i = *(u32 *)key; + u32 ifindex = *(u32 *)value; + + if (unlikely(map_flags > BPF_EXIST)) + return -EINVAL; + + if (unlikely(i >= dtab->map.max_entries)) + return -E2BIG; + + if (unlikely(map_flags == BPF_NOEXIST)) + return -EEXIST; + + if (!ifindex) { + dev = NULL; + } else { + dev = kmalloc(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN); + if (!dev) + return -ENOMEM; + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + kfree(dev); + return -EINVAL; + } + + dev->key = i; + dev->dtab = dtab; + } + + /* Use call_rcu() here to ensure rcu critical sections have completed + * Remembering the driver side flush operation will happen before the + * net device is removed. + */ + mutex_lock(&dev_map_list_mutex); + old_dev = xchg(&dtab->netdev_map[i], dev); + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + mutex_unlock(&dev_map_list_mutex); + + return 0; +} + +const struct bpf_map_ops dev_map_ops = { + .map_alloc = dev_map_alloc, + .map_free = dev_map_free, + .map_get_next_key = dev_map_get_next_key, + .map_lookup_elem = dev_map_lookup_elem, + .map_update_elem = dev_map_update_elem, + .map_delete_elem = dev_map_delete_elem, +}; + +static int dev_map_notification(struct notifier_block *notifier, + ulong event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct bpf_dtab *dtab; + int i; + + switch (event) { + case NETDEV_UNREGISTER: + mutex_lock(&dev_map_list_mutex); + list_for_each_entry(dtab, &dev_map_list, list) { + for (i = 0; i < dtab->map.max_entries; i++) { + struct bpf_dtab_netdev *dev; + + dev = dtab->netdev_map[i]; + if (!dev || + dev->dev->ifindex != netdev->ifindex) + continue; + dev = xchg(&dtab->netdev_map[i], NULL); + if (dev) + call_rcu(&dev->rcu, + __dev_map_entry_free); + } + } + mutex_unlock(&dev_map_list_mutex); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block dev_map_notifier = { + .notifier_call = dev_map_notification, +}; + +static int __init dev_map_init(void) +{ + register_netdevice_notifier(&dev_map_notifier); + return 0; +} + +subsys_initcall(dev_map_init); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 664d93972373..f6e8b3887eab 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1283,6 +1283,14 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + /* devmap returns a pointer to a live net_device ifindex that we cannot + * allow to be modified from bpf side. So do not allow lookup elements + * for now. + */ + case BPF_MAP_TYPE_DEVMAP: + if (func_id != BPF_FUNC_redirect_map) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) @@ -1311,6 +1319,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; break; + case BPF_FUNC_redirect_map: + if (map->map_type != BPF_MAP_TYPE_DEVMAP) + goto error; + break; default: break; } diff --git a/lib/nlattr.c b/lib/nlattr.c index fb52435be42d..ee79b7a3c6b0 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -27,6 +27,30 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S64] = sizeof(s64), }; +static int validate_nla_bitfield32(const struct nlattr *nla, + u32 *valid_flags_allowed) +{ + const struct nla_bitfield32 *bf = nla_data(nla); + u32 *valid_flags_mask = valid_flags_allowed; + + if (!valid_flags_allowed) + return -EINVAL; + + /*disallow invalid bit selector */ + if (bf->selector & ~*valid_flags_mask) + return -EINVAL; + + /*disallow invalid bit values */ + if (bf->value & ~*valid_flags_mask) + return -EINVAL; + + /*disallow valid bit values that are not selected*/ + if (bf->value & ~bf->selector) + return -EINVAL; + + return 0; +} + static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy) { @@ -46,6 +70,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype, return -ERANGE; break; + case NLA_BITFIELD32: + if (attrlen != sizeof(struct nla_bitfield32)) + return -ERANGE; + + return validate_nla_bitfield32(nla, pt->validation_data); + case NLA_NUL_STRING: if (pt->len) minlen = min_t(int, attrlen, pt->len + 1); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index ab3b654b05cc..4e2576fc0c59 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct lowpan_peer *peer) { const u8 *saddr; - struct lowpan_btle_dev *dev; - - dev = lowpan_btle_dev(netdev); saddr = peer->lladdr; @@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev) static void ifdown(struct net_device *netdev) { - int err; - rtnl_lock(); - err = dev_close(netdev); - if (err < 0) - BT_INFO("iface %s cannot be closed (%d)", netdev->name, err); + dev_close(netdev); rtnl_unlock(); } diff --git a/net/core/Makefile b/net/core/Makefile index 79f9479e9658..d501c4278015 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o -obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/dev.c b/net/core/dev.c index 8515f8fe0460..8ea6b4b42611 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -144,6 +144,7 @@ #include <linux/netfilter_ingress.h> #include <linux/crash_dump.h> #include <linux/sctp.h> +#include <net/udp_tunnel.h> #include "net-sysfs.h" @@ -1413,7 +1414,7 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close_many(struct list_head *head) +static void __dev_close_many(struct list_head *head) { struct net_device *dev; @@ -1455,23 +1456,18 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; netpoll_poll_enable(dev); } - - return 0; } -static int __dev_close(struct net_device *dev) +static void __dev_close(struct net_device *dev) { - int retval; LIST_HEAD(single); list_add(&dev->close_list, &single); - retval = __dev_close_many(&single); + __dev_close_many(&single); list_del(&single); - - return retval; } -int dev_close_many(struct list_head *head, bool unlink) +void dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1488,8 +1484,6 @@ int dev_close_many(struct list_head *head, bool unlink) if (unlink) list_del_init(&dev->close_list); } - - return 0; } EXPORT_SYMBOL(dev_close_many); @@ -1502,7 +1496,7 @@ EXPORT_SYMBOL(dev_close_many); * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier * chain. */ -int dev_close(struct net_device *dev) +void dev_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1511,7 +1505,6 @@ int dev_close(struct net_device *dev) dev_close_many(&single, true); list_del(&single); } - return 0; } EXPORT_SYMBOL(dev_close); @@ -3865,6 +3858,121 @@ drop: return NET_RX_DROP; } +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff xdp; + u32 act = XDP_DROP; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + if (skb_linearize(skb)) + goto do_drop; + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + __skb_push(skb, mac_len); + /* fall through */ + case XDP_PASS: + break; + + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} + +static struct static_key generic_xdp_needed __read_mostly; + +static int do_xdp_generic(struct sk_buff *skb) +{ + struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + int err; + + if (act != XDP_PASS) { + switch (act) { + case XDP_REDIRECT: + err = xdp_do_generic_redirect(skb->dev, skb); + if (err) + goto out_redir; + /* fallthru to submit skb */ + case XDP_TX: + generic_xdp_tx(skb, xdp_prog); + break; + } + return XDP_DROP; + } + } + return XDP_PASS; +out_redir: + trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT); + kfree_skb(skb); + return XDP_DROP; +} + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3872,6 +3980,18 @@ static int netif_rx_internal(struct sk_buff *skb) net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); + + if (static_key_false(&generic_xdp_needed)) { + int ret = do_xdp_generic(skb); + + /* Consider XDP consuming the packet a success from + * the netdev point of view we do not want to count + * this as an error. + */ + if (ret != XDP_PASS) + return NET_RX_SUCCESS; + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4338,8 +4458,6 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static struct static_key generic_xdp_needed __read_mostly; - static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); @@ -4373,89 +4491,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) return ret; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct bpf_prog *xdp_prog) -{ - struct xdp_buff xdp; - u32 act = XDP_DROP; - void *orig_data; - int hlen, off; - u32 mac_len; - - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_cloned(skb)) - return XDP_PASS; - - if (skb_linearize(skb)) - goto do_drop; - - /* The XDP program wants to see the packet starting at the MAC - * header. - */ - mac_len = skb->data - skb_mac_header(skb); - hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data = xdp.data; - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - off = xdp.data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - - switch (act) { - case XDP_TX: - __skb_push(skb, mac_len); - /* fall through */ - case XDP_PASS: - break; - - default: - bpf_warn_invalid_xdp_action(act); - /* fall through */ - case XDP_ABORTED: - trace_xdp_exception(skb->dev, xdp_prog, act); - /* fall through */ - case XDP_DROP: - do_drop: - kfree_skb(skb); - break; - } - - return act; -} - -/* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. - */ -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - bool free_skb = true; - int cpu, rc; - - txq = netdev_pick_tx(dev, skb, NULL); - cpu = smp_processor_id(); - HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { - rc = netdev_start_xmit(skb, dev, txq, 0); - if (dev_xmit_complete(rc)) - free_skb = false; - } - HARD_TX_UNLOCK(dev, txq); - if (free_skb) { - trace_xdp_exception(dev, xdp_prog, XDP_TX); - kfree_skb(skb); - } -} - static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4468,17 +4503,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); if (static_key_false(&generic_xdp_needed)) { - struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + int ret = do_xdp_generic(skb); - if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); - - if (act != XDP_PASS) { - rcu_read_unlock(); - if (act == XDP_TX) - generic_xdp_tx(skb, xdp_prog); - return NET_RX_DROP; - } + if (ret != XDP_PASS) { + rcu_read_unlock(); + return NET_RX_DROP; } } @@ -6689,8 +6718,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) - ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); + if ((old_flags ^ flags) & IFF_UP) { + if (old_flags & IFF_UP) + __dev_close(dev); + else + ret = __dev_open(dev); + } if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -7235,24 +7268,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_GSO; } - /* UFO needs SG and checksumming */ - if (features & NETIF_F_UFO) { - /* maybe split UFO into V4 and V6? */ - if (!(features & NETIF_F_HW_CSUM) && - ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no checksum offload features.\n"); - features &= ~NETIF_F_UFO; - } - - if (!(features & NETIF_F_SG)) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); - features &= ~NETIF_F_UFO; - } - } - /* GSO partial features require GSO partial be set */ if ((features & dev->gso_partial_features) && !(features & NETIF_F_GSO_PARTIAL)) { @@ -7313,8 +7328,27 @@ sync_lower: netdev_for_each_lower_dev(dev, lower, iter) netdev_sync_lower_features(dev, lower, features); - if (!err) + if (!err) { + netdev_features_t diff = features ^ dev->features; + + if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) { + /* udp_tunnel_{get,drop}_rx_info both need + * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the + * device, or they won't do anything. + * Thus we need to update dev->features + * *before* calling udp_tunnel_get_rx_info, + * but *after* calling udp_tunnel_drop_rx_info. + */ + if (features & NETIF_F_RX_UDP_TUNNEL_PORT) { + dev->features = features; + udp_tunnel_get_rx_info(dev); + } else { + udp_tunnel_drop_rx_info(dev); + } + } + dev->features = features; + } return err < 0 ? 0 : 1; } @@ -7516,6 +7550,12 @@ int register_netdevice(struct net_device *dev) */ dev->hw_features |= NETIF_F_SOFT_FEATURES; dev->features |= NETIF_F_SOFT_FEATURES; + + if (dev->netdev_ops->ndo_udp_tunnel_add) { + dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; + dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT; + } + dev->wanted_features = dev->features & dev->hw_features; if (!(dev->flags & IFF_LOOPBACK)) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 674b6c9cec18..6a582ae4c5d9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", @@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_HW_TC_BIT] = "hw-tc-offload", [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", + [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", }; static const char @@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; - case ETHTOOL_GUFO: - case ETHTOOL_SUFO: - return NETIF_F_UFO; case ETHTOOL_GGSO: case ETHTOOL_SGSO: return NETIF_F_GSO; @@ -2515,6 +2512,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + + if (!dev->ethtool_ops->get_fecparam) + return -EOPNOTSUPP; + + dev->ethtool_ops->get_fecparam(dev, &fecparam); + + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam; + + if (!dev->ethtool_ops->set_fecparam) + return -EOPNOTSUPP; + + if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) + return -EFAULT; + + return dev->ethtool_ops->set_fecparam(dev, &fecparam); +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -2555,7 +2579,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: @@ -2574,6 +2597,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GTUNABLE: case ETHTOOL_PHY_GTUNABLE: case ETHTOOL_GLINKSETTINGS: + case ETHTOOL_GFECPARAM: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -2723,7 +2747,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: rc = ethtool_get_one_feature(dev, useraddr, ethcmd); @@ -2732,7 +2755,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: - case ETHTOOL_SUFO: case ETHTOOL_SGSO: case ETHTOOL_SGRO: rc = ethtool_set_one_feature(dev, useraddr, ethcmd); @@ -2785,6 +2807,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_PHY_STUNABLE: rc = set_phy_tunable(dev, useraddr); break; + case ETHTOOL_GFECPARAM: + rc = ethtool_get_fecparam(dev, useraddr); + break; + case ETHTOOL_SFECPARAM: + rc = ethtool_set_fecparam(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..7e9708653c6f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -55,6 +55,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <linux/bpf_trace.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -1778,6 +1779,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { struct redirect_info { u32 ifindex; u32 flags; + struct bpf_map *map; + struct bpf_map *map_to_flush; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -1791,6 +1794,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; + ri->map = NULL; return TC_ACT_REDIRECT; } @@ -1818,6 +1822,29 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + ri->map = map; + + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_redirect_map_proto = { + .func = bpf_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -2024,8 +2051,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to - * be changed into SKB_GSO_TCPV6. + /* SKB_GSO_TCPV4 needs to be changed into + * SKB_GSO_TCPV6. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; @@ -2060,8 +2087,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to - * be changed into SKB_GSO_TCPV4. + /* SKB_GSO_TCPV6 needs to be changed into + * SKB_GSO_TCPV4. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; @@ -2412,6 +2439,140 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static int __bpf_tx_xdp(struct net_device *dev, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (!dev->netdev_ops->ndo_xdp_xmit) { + bpf_warn_invalid_xdp_redirect(dev->ifindex); + return -EOPNOTSUPP; + } + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; + + if (map) + __dev_map_insert_ctx(map, index); + else + dev->netdev_ops->ndo_xdp_flush(dev); + + return err; +} + +void xdp_do_flush_map(void) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map_to_flush; + + ri->map = NULL; + ri->map_to_flush = NULL; + + if (map) + __dev_map_flush(map); +} +EXPORT_SYMBOL_GPL(xdp_do_flush_map); + +int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map; + u32 index = ri->ifindex; + struct net_device *fwd; + int err = -EINVAL; + + ri->ifindex = 0; + ri->map = NULL; + + fwd = __dev_map_lookup_elem(map, index); + if (!fwd) + goto out; + + if (ri->map_to_flush && (ri->map_to_flush != map)) + xdp_do_flush_map(); + + err = __bpf_tx_xdp(fwd, map, xdp, index); + if (likely(!err)) + ri->map_to_flush = map; + +out: + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + return err; +} + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *fwd; + + if (ri->map) + return xdp_do_redirect_map(dev, xdp, xdp_prog); + + fwd = dev_get_by_index_rcu(dev_net(dev), ri->ifindex); + ri->ifindex = 0; + ri->map = NULL; + if (unlikely(!fwd)) { + bpf_warn_invalid_xdp_redirect(ri->ifindex); + return -EINVAL; + } + + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + + return __bpf_tx_xdp(fwd, NULL, xdp, 0); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect); + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + unsigned int len; + + dev = dev_get_by_index_rcu(dev_net(dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + bpf_warn_invalid_xdp_redirect(ri->ifindex); + goto err; + } + + if (unlikely(!(dev->flags & IFF_UP))) + goto err; + + len = dev->mtu + dev->hard_header_len + VLAN_HLEN; + if (skb->len > len) + goto err; + + skb->dev = dev; + return 0; +err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); + +BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_xdp_redirect_proto = { + .func = bpf_xdp_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -3011,6 +3172,10 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_redirect: + return &bpf_xdp_redirect_proto; + case BPF_FUNC_redirect_map: + return &bpf_redirect_map_proto; default: return bpf_base_func_proto(func_id); } @@ -3310,6 +3475,11 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +void bpf_warn_invalid_xdp_redirect(u32 ifindex) +{ + WARN_ONCE(1, "Illegal XDP redirect to unsupported device ifindex(%i)\n", ifindex); +} + static bool __is_valid_sock_ops_access(int off, int size) { if (off < 0 || off >= sizeof(struct bpf_sock_ops)) diff --git a/net/core/flow.c b/net/core/flow.c deleted file mode 100644 index f7f5d1932a27..000000000000 --- a/net/core/flow.c +++ /dev/null @@ -1,516 +0,0 @@ -/* flow.c: Generic flow cache. - * - * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) - * Copyright (C) 2003 David S. Miller (davem@redhat.com) - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/jhash.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/percpu.h> -#include <linux/bitops.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/cpumask.h> -#include <linux/mutex.h> -#include <net/flow.h> -#include <linux/atomic.h> -#include <linux/security.h> -#include <net/net_namespace.h> - -struct flow_cache_entry { - union { - struct hlist_node hlist; - struct list_head gc_list; - } u; - struct net *net; - u16 family; - u8 dir; - u32 genid; - struct flowi key; - struct flow_cache_object *object; -}; - -struct flow_flush_info { - struct flow_cache *cache; - atomic_t cpuleft; - struct completion completion; -}; - -static struct kmem_cache *flow_cachep __read_mostly; - -#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) -#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) - -static void flow_cache_new_hashrnd(unsigned long arg) -{ - struct flow_cache *fc = (void *) arg; - int i; - - for_each_possible_cpu(i) - per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; - - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); -} - -static int flow_entry_valid(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) - return 0; - if (fle->object && !fle->object->ops->check(fle->object)) - return 0; - return 1; -} - -static void flow_entry_kill(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (fle->object) - fle->object->ops->delete(fle->object); - kmem_cache_free(flow_cachep, fle); -} - -static void flow_cache_gc_task(struct work_struct *work) -{ - struct list_head gc_list; - struct flow_cache_entry *fce, *n; - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); - - INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { - flow_entry_kill(fce, xfrm); - atomic_dec(&xfrm->flow_cache_gc_count); - } -} - -static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - unsigned int deleted, - struct list_head *gc_list, - struct netns_xfrm *xfrm) -{ - if (deleted) { - atomic_add(deleted, &xfrm->flow_cache_gc_count); - fcp->hash_count -= deleted; - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - schedule_work(&xfrm->flow_cache_gc_work); - } -} - -static void __flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - unsigned int shrink_to) -{ - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - for (i = 0; i < flow_cache_hash_size(fc); i++) { - unsigned int saved = 0; - - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (saved < shrink_to && - flow_entry_valid(fle, xfrm)) { - saved++; - } else { - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); -} - -static void flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); - - __flow_cache_shrink(fc, fcp, shrink_to); -} - -static void flow_new_hash_rnd(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - get_random_bytes(&fcp->hash_rnd, sizeof(u32)); - fcp->hash_rnd_recalc = 0; - __flow_cache_shrink(fc, fcp, 0); -} - -static u32 flow_hash_code(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - const struct flowi *key, - unsigned int keysize) -{ - const u32 *k = (const u32 *) key; - const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - - return jhash2(k, length, fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1); -} - -/* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment. - */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - unsigned int keysize) -{ - const flow_compare_t *k1, *k1_lim, *k2; - - k1 = (const flow_compare_t *) key1; - k1_lim = k1 + keysize; - - k2 = (const flow_compare_t *) key2; - - do { - if (*k1++ != *k2++) - return 1; - } while (k1 < k1_lim); - - return 0; -} - -struct flow_cache_object * -flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver, void *ctx) -{ - struct flow_cache *fc = &net->xfrm.flow_cache_global; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle, *tfle; - struct flow_cache_object *flo; - unsigned int keysize; - unsigned int hash; - - local_bh_disable(); - fcp = this_cpu_ptr(fc->percpu); - - fle = NULL; - flo = NULL; - - keysize = flow_key_size(family); - if (!keysize) - goto nocache; - - /* Packet really early in init? Making flow_cache_init a - * pre-smp initcall would solve this. --RR */ - if (!fcp->hash_table) - goto nocache; - - if (fcp->hash_rnd_recalc) - flow_new_hash_rnd(fc, fcp); - - hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { - if (tfle->net == net && - tfle->family == family && - tfle->dir == dir && - flow_key_compare(key, &tfle->key, keysize) == 0) { - fle = tfle; - break; - } - } - - if (unlikely(!fle)) { - if (fcp->hash_count > fc->high_watermark) - flow_cache_shrink(fc, fcp); - - if (atomic_read(&net->xfrm.flow_cache_gc_count) > - 2 * num_online_cpus() * fc->high_watermark) { - flo = ERR_PTR(-ENOBUFS); - goto ret_object; - } - - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); - if (fle) { - fle->net = net; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); - fle->object = NULL; - hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); - fcp->hash_count++; - } - } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { - flo = fle->object; - if (!flo) - goto ret_object; - flo = flo->ops->get(flo); - if (flo) - goto ret_object; - } else if (fle->object) { - flo = fle->object; - flo->ops->delete(flo); - fle->object = NULL; - } - -nocache: - flo = NULL; - if (fle) { - flo = fle->object; - fle->object = NULL; - } - flo = resolver(net, key, family, dir, flo, ctx); - if (fle) { - fle->genid = atomic_read(&net->xfrm.flow_cache_genid); - if (!IS_ERR(flo)) - fle->object = flo; - else - fle->genid--; - } else { - if (!IS_ERR_OR_NULL(flo)) - flo->ops->delete(flo); - } -ret_object: - local_bh_enable(); - return flo; -} -EXPORT_SYMBOL(flow_cache_lookup); - -static void flow_cache_flush_tasklet(unsigned long data) -{ - struct flow_flush_info *info = (void *)data; - struct flow_cache *fc = info->cache; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - fcp = this_cpu_ptr(fc->percpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle, xfrm)) - continue; - - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); - - if (atomic_dec_and_test(&info->cpuleft)) - complete(&info->completion); -} - -/* - * Return whether a cpu needs flushing. Conservatively, we assume - * the presence of any entries means the core may require flushing, - * since the flow_cache_ops.check() function may assume it's running - * on the same core as the per-cpu cache component. - */ -static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp; - unsigned int i; - - fcp = per_cpu_ptr(fc->percpu, cpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) - if (!hlist_empty(&fcp->hash_table[i])) - return 0; - return 1; -} - -static void flow_cache_flush_per_cpu(void *data) -{ - struct flow_flush_info *info = data; - struct tasklet_struct *tasklet; - - tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; - tasklet->data = (unsigned long)info; - tasklet_schedule(tasklet); -} - -void flow_cache_flush(struct net *net) -{ - struct flow_flush_info info; - cpumask_var_t mask; - int i, self; - - /* Track which cpus need flushing to avoid disturbing all cores. */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return; - cpumask_clear(mask); - - /* Don't want cpus going down or up during this. */ - get_online_cpus(); - mutex_lock(&net->xfrm.flow_flush_sem); - info.cache = &net->xfrm.flow_cache_global; - for_each_online_cpu(i) - if (!flow_cache_percpu_empty(info.cache, i)) - cpumask_set_cpu(i, mask); - atomic_set(&info.cpuleft, cpumask_weight(mask)); - if (atomic_read(&info.cpuleft) == 0) - goto done; - - init_completion(&info.completion); - - local_bh_disable(); - self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); - on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); - if (self) - flow_cache_flush_tasklet((unsigned long)&info); - local_bh_enable(); - - wait_for_completion(&info.completion); - -done: - mutex_unlock(&net->xfrm.flow_flush_sem); - put_online_cpus(); - free_cpumask_var(mask); -} - -static void flow_cache_flush_task(struct work_struct *work) -{ - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_flush_work); - struct net *net = container_of(xfrm, struct net, xfrm); - - flow_cache_flush(net); -} - -void flow_cache_flush_deferred(struct net *net) -{ - schedule_work(&net->xfrm.flow_cache_flush_work); -} - -static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); - - if (!fcp->hash_table) { - fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); - if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %u\n", sz); - return -ENOMEM; - } - fcp->hash_rnd_recalc = 1; - fcp->hash_count = 0; - tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); - } - return 0; -} - -static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - - return flow_cache_cpu_prepare(fc, cpu); -} - -static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - - __flow_cache_shrink(fc, fcp, 0); - return 0; -} - -int flow_cache_init(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - if (!flow_cachep) - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); - spin_lock_init(&net->xfrm.flow_cache_gc_lock); - INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); - INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); - INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); - mutex_init(&net->xfrm.flow_flush_sem); - atomic_set(&net->xfrm.flow_cache_gc_count, 0); - - fc->hash_shift = 10; - fc->low_watermark = 2 * flow_cache_hash_size(fc); - fc->high_watermark = 4 * flow_cache_hash_size(fc); - - fc->percpu = alloc_percpu(struct flow_cache_percpu); - if (!fc->percpu) - return -ENOMEM; - - if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) - goto err; - - setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, - (unsigned long) fc); - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); - - return 0; - -err: - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; - - return -ENOMEM; -} -EXPORT_SYMBOL(flow_cache_init); - -void flow_cache_fini(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - del_timer_sync(&fc->rnd_timer); - - cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); - - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; -} -EXPORT_SYMBOL(flow_cache_fini); - -void __init flow_cache_hp_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, - "net/flow:prepare", - flow_cache_cpu_up_prep, - flow_cache_cpu_dead); - WARN_ON(ret < 0); -} diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..c27da51d14e4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -158,31 +158,6 @@ out: * */ -struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) -{ - struct sk_buff *skb; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(skbuff_head_cache, - gfp_mask & ~__GFP_DMA, node); - if (!skb) - goto out; - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->head = NULL; - skb->truesize = sizeof(struct sk_buff); - refcount_set(&skb->users, 1); - - skb->mac_header = (typeof(skb->mac_header))~0U; -out: - return skb; -} - /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -762,8 +737,7 @@ void consume_stateless_skb(struct sk_buff *skb) return; trace_consume_skb(skb); - if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb); kfree_skbmem(skb); } @@ -1719,6 +1693,8 @@ pull_pages: if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + if (!i) + goto end; eat = 0; } k++; @@ -1726,6 +1702,7 @@ pull_pages: } skb_shinfo(skb)->nr_frags = k; +end: skb->tail += delta; skb->data_len -= delta; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 416ac4ef9ba9..a55e2e4087a4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -220,6 +220,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP +static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +{ + return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; +} + int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76c2077c3f5b..5ce44fb7d498 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1219,10 +1219,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, gso_partial, encap; + bool fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1257,7 +1256,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { - udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1277,13 +1275,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (udpfrag) { - iph->frag_off = htons(offset >> 3); - if (skb->next) - iph->frag_off |= htons(IP_MF); - offset += skb->len - nhoff - ihl; - tot_len = skb->len - nhoff; - } else if (skb_is_gso(skb)) { + if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d5cac99170b1..416bb304a281 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo, gso_partial; + bool need_csum, gso_partial; if (!skb->encapsulation) goto out; @@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); skb->encap_hdr_csum = need_csum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - features &= skb->dev->hw_enc_features; - /* The only checksum offload we care about from here on out is the - * outer one so strip the existing checksum feature flags based - * on the fact that we will be computing our checksum in software. - */ - if (ufo) { - features &= ~NETIF_F_CSUM_MASK; - if (!need_csum) - features |= NETIF_F_HW_CSUM; - } - /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c5a117cc6619..337ad41bb80a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -33,7 +33,7 @@ * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * - * Node pool is organised as an AVL tree. + * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay @@ -45,7 +45,7 @@ * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: - * avl_left, avl_right, avl_parent, avl_height: pool lock + * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable @@ -53,30 +53,15 @@ static struct kmem_cache *peer_cachep __read_mostly; -static LIST_HEAD(gc_list); -static const int gc_delay = 60 * HZ; -static struct delayed_work gc_work; -static DEFINE_SPINLOCK(gc_lock); - -#define node_height(x) x->avl_height - -#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) -#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) -static const struct inet_peer peer_fake_node = { - .avl_left = peer_avl_empty_rcu, - .avl_right = peer_avl_empty_rcu, - .avl_height = 0 -}; - void inet_peer_base_init(struct inet_peer_base *bp) { - bp->root = peer_avl_empty_rcu; + bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); -#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ +#define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more @@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -static void inetpeer_gc_worker(struct work_struct *work) -{ - struct inet_peer *p, *n, *c; - struct list_head list; - - spin_lock_bh(&gc_lock); - list_replace_init(&gc_list, &list); - spin_unlock_bh(&gc_lock); - - if (list_empty(&list)) - return; - - list_for_each_entry_safe(p, n, &list, gc_list) { - - if (need_resched()) - cond_resched(); - - c = rcu_dereference_protected(p->avl_left, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_left = peer_avl_empty_rcu; - } - - c = rcu_dereference_protected(p->avl_right, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_right = peer_avl_empty_rcu; - } - - n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - - if (refcount_read(&p->refcnt) == 1) { - list_del(&p->gc_list); - kmem_cache_free(peer_cachep, p); - } - } - - if (list_empty(&list)) - return; - - spin_lock_bh(&gc_lock); - list_splice(&list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -153,225 +91,62 @@ void __init inet_initpeers(void) sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - - INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } -#define rcu_deref_locked(X, BASE) \ - rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) - -/* - * Called with local BH disabled and the pool lock held. - */ -#define lookup(_daddr, _stack, _base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - \ - stackptr = _stack; \ - *stackptr++ = &_base->root; \ - for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty;) { \ - int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \ - if (cmp == 0) \ - break; \ - if (cmp == -1) \ - v = &u->avl_left; \ - else \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, _base); \ - } \ - u; \ -}) - -/* - * Called with rcu_read_lock() - * Because we hold no lock against a writer, its quite possible we fall - * in an endless loop. - * But every pointer we follow is guaranteed to be valid thanks to RCU. - * We exit from this function if number of links exceeds PEER_MAXDEPTH - */ -static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +/* Called with rcu_read_lock() or base->lock held */ +static struct inet_peer *lookup(const struct inetpeer_addr *daddr, + struct inet_peer_base *base, + unsigned int seq, + struct inet_peer *gc_stack[], + unsigned int *gc_cnt, + struct rb_node **parent_p, + struct rb_node ***pp_p) { - struct inet_peer *u = rcu_dereference(base->root); - int count = 0; + struct rb_node **pp, *parent; + struct inet_peer *p; + + pp = &base->rb_root.rb_node; + parent = NULL; + while (*pp) { + int cmp; - while (u != peer_avl_empty) { - int cmp = inetpeer_addr_cmp(daddr, &u->daddr); + parent = rcu_dereference_raw(*pp); + p = rb_entry(parent, struct inet_peer, rb_node); + cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - /* Before taking a reference, check if this entry was - * deleted (refcnt=0) - */ - if (!refcount_inc_not_zero(&u->refcnt)) { - u = NULL; - } - return u; + if (!refcount_inc_not_zero(&p->refcnt)) + break; + return p; + } + if (gc_stack) { + if (*gc_cnt < PEER_MAX_GC) + gc_stack[(*gc_cnt)++] = p; + } else if (unlikely(read_seqretry(&base->lock, seq))) { + break; } if (cmp == -1) - u = rcu_dereference(u->avl_left); + pp = &(*pp)->rb_left; else - u = rcu_dereference(u->avl_right); - if (unlikely(++count == PEER_MAXDEPTH)) - break; + pp = &(*pp)->rb_right; } + *parent_p = parent; + *pp_p = pp; return NULL; } -/* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start, base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - *stackptr++ = &start->avl_left; \ - v = &start->avl_left; \ - for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu;) { \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, base); \ - } \ - u; \ -}) - -/* Called with local BH disabled and the pool lock held. - * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. - */ -static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend, - struct inet_peer_base *base) -{ - struct inet_peer __rcu **nodep; - struct inet_peer *node, *l, *r; - int lh, rh; - - while (stackend > stack) { - nodep = *--stackend; - node = rcu_deref_locked(*nodep, base); - l = rcu_deref_locked(node->avl_left, base); - r = rcu_deref_locked(node->avl_right, base); - lh = node_height(l); - rh = node_height(r); - if (lh > rh + 1) { /* l: RH+2 */ - struct inet_peer *ll, *lr, *lrl, *lrr; - int lrh; - ll = rcu_deref_locked(l->avl_left, base); - lr = rcu_deref_locked(l->avl_right, base); - lrh = node_height(lr); - if (lrh <= node_height(ll)) { /* ll: RH+1 */ - RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = lrh + 1; /* RH+1 or RH+2 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ - RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ - l->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, l); - } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ - lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = rh + 1; /* node: RH+1 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ - RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ - l->avl_height = rh + 1; /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ - lr->avl_height = rh + 2; - RCU_INIT_POINTER(*nodep, lr); - } - } else if (rh > lh + 1) { /* r: LH+2 */ - struct inet_peer *rr, *rl, *rlr, *rll; - int rlh; - rr = rcu_deref_locked(r->avl_right, base); - rl = rcu_deref_locked(r->avl_left, base); - rlh = node_height(rl); - if (rlh <= node_height(rr)) { /* rr: LH+1 */ - RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = rlh + 1; /* LH+1 or LH+2 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ - RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ - r->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, r); - } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ - rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = lh + 1; /* node: LH+1 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ - RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ - r->avl_height = lh + 1; /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ - rl->avl_height = lh + 2; - RCU_INIT_POINTER(*nodep, rl); - } - } else { - node->avl_height = (lh > rh ? lh : rh) + 1; - } - } -} - -/* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n, base) \ -do { \ - n->avl_height = 1; \ - n->avl_left = peer_avl_empty_rcu; \ - n->avl_right = peer_avl_empty_rcu; \ - /* lockless readers can catch us now */ \ - rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr, base); \ -} while (0) - static void inetpeer_free_rcu(struct rcu_head *head) { kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer __rcu ***stackptr, ***delp; - - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - call_rcu(&p->rcu, inetpeer_free_rcu); -} - /* perform garbage collect on all items stacked during a lookup */ -static int inet_peer_gc(struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH], - struct inet_peer __rcu ***stackptr) +static void inet_peer_gc(struct inet_peer_base *base, + struct inet_peer *gc_stack[], + unsigned int gc_cnt) { - struct inet_peer *p, *gchead = NULL; + struct inet_peer *p; __u32 delta, ttl; - int cnt = 0; + int i; if (base->total >= inet_peer_threshold) ttl = 0; /* be aggressive */ @@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base, ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * base->total / inet_peer_threshold * HZ; - stackptr--; /* last stack slot is peer_avl_empty */ - while (stackptr > stack) { - stackptr--; - p = rcu_deref_locked(**stackptr, base); - if (refcount_read(&p->refcnt) == 1) { - smp_rmb(); - delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { - p->gc_next = gchead; - gchead = p; - } - } + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + delta = (__u32)jiffies - p->dtime; + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) + gc_stack[i] = NULL; } - while ((p = gchead) != NULL) { - gchead = p->gc_next; - cnt++; - unlink_from_pool(p, base, stack); + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + if (p) { + rb_erase(&p->rb_node, &base->rb_root); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); + } } - return cnt; } struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer *p; - unsigned int sequence; - int invalidated, gccnt = 0; + struct inet_peer *p, *gc_stack[PEER_MAX_GC]; + struct rb_node **pp, *parent; + unsigned int gc_cnt, seq; + int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); - sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base); - invalidated = read_seqretry(&base->lock, sequence); + seq = read_seqbegin(&base->lock); + p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); + invalidated = read_seqretry(&base->lock, seq); rcu_read_unlock(); if (p) @@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ + parent = NULL; write_seqlock_bh(&base->lock); -relookup: - p = lookup(daddr, stack, base); - if (p != peer_avl_empty) { - refcount_inc(&p->refcnt); - write_sequnlock_bh(&base->lock); - return p; - } - if (!gccnt) { - gccnt = inet_peer_gc(base, stack, stackptr); - if (gccnt && create) - goto relookup; - } - p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; - if (p) { - p->daddr = *daddr; - refcount_set(&p->refcnt, 2); - atomic_set(&p->rid, 0); - p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; - p->rate_tokens = 0; - /* 60*HZ is arbitrary, but chosen enough high so that the first - * calculation of tokens is at its maximum. - */ - p->rate_last = jiffies - 60*HZ; - INIT_LIST_HEAD(&p->gc_list); - /* Link the node. */ - link_to_pool(p, base); - base->total++; + gc_cnt = 0; + p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); + if (!p && create) { + p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); + if (p) { + p->daddr = *daddr; + refcount_set(&p->refcnt, 2); + atomic_set(&p->rid, 0); + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; + + rb_link_node(&p->rb_node, parent, pp); + rb_insert_color(&p->rb_node, &base->rb_root); + base->total++; + } } + if (gc_cnt) + inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; @@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; - smp_mb__before_atomic(); - refcount_dec(&p->refcnt); + + if (refcount_dec_and_test(&p->refcnt)) + call_rcu(&p->rcu, inetpeer_free_rcu); } EXPORT_SYMBOL_GPL(inet_putpeer); @@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -static void inetpeer_inval_rcu(struct rcu_head *head) -{ - struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); - - spin_lock_bh(&gc_lock); - list_add_tail(&p->gc_list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *root; - - write_seqlock_bh(&base->lock); + struct inet_peer *p, *n; - root = rcu_deref_locked(base->root, base); - if (root != peer_avl_empty) { - base->root = peer_avl_empty_rcu; - base->total = 0; - call_rcu(&root->gc_rcu, inetpeer_inval_rcu); + rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { + inet_putpeer(p); + cond_resched(); } - write_sequnlock_bh(&base->lock); + base->rb_root = RB_ROOT; + base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50c74cd890bc..b631ec685d77 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy) return csum; } -static inline int ip_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int maxfraglen, unsigned int flags) -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP fragmentation offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_reset_network_header(skb); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, @@ -965,18 +910,6 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { - err = ip_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, transhdrlen, - maxfraglen, flags); - if (err) - goto error; - return 0; - } /* So, what's going on in the loop below? * @@ -1287,15 +1220,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - if ((size + skb->len > mtu) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { - if (skb->ip_summed != CHECKSUM_PARTIAL) - return -EOPNOTSUPP; - - skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - } cork->length += size; while (size > 0) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0192c255e508..5ed63d250950 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; -static bool is_vti_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti_netdev_ops; -} - -static int vti_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip_tunnel *tunnel = netdev_priv(dev); - - if (!is_vti_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(tunnel->net, dev_net(dev))) - xfrm_garbage_collect(tunnel->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti_notifier_block __read_mostly = { - .notifier_call = vti_device_event, -}; - static int __init vti_init(void) { const char *msg; @@ -618,8 +591,6 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); - register_netdevice_notifier(&vti_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -652,7 +623,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -664,7 +634,6 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); - unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 43eb6567b3a0..b6d3fe03feb3 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -206,14 +206,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS), SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS), - SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED), - SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG), - SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE), - SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED), - SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS), - SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER), SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS), - SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS), SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), @@ -230,14 +223,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES), SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS), - SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), - SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED), SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT), SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9bf809726066..0d3c038d7b04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,6 +45,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +/* obsolete */ +static int sysctl_tcp_low_latency __read_mostly; + /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71ce33decd97..5326b50a3450 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) return period; } +static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp) +{ + u32 rate = READ_ONCE(tp->rate_delivered); + u32 intv = READ_ONCE(tp->rate_interval_us); + u64 rate64 = 0; + + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + } + return rate64; +} + /* Address-family independent initialization for a tcp_sock. * * NOTE: A lot of things set to zero explicitly by call to @@ -400,7 +413,6 @@ void tcp_init_sock(struct sock *sk) tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; @@ -1525,20 +1537,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -static void tcp_prequeue_process(struct sock *sk) -{ - struct sk_buff *skb; - struct tcp_sock *tp = tcp_sk(sk); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - /* Clear memory counter. */ - tp->ucopy.memory = 0; -} - static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1671,7 +1669,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int err; int target; /* Read at least this many bytes */ long timeo; - struct task_struct *user_recv = NULL; struct sk_buff *skb, *last; u32 urg_hole = 0; @@ -1806,51 +1803,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_cleanup_rbuf(sk, copied); - if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { - /* Install new reader */ - if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { - user_recv = current; - tp->ucopy.task = user_recv; - tp->ucopy.msg = msg; - } - - tp->ucopy.len = len; - - WARN_ON(tp->copied_seq != tp->rcv_nxt && - !(flags & (MSG_PEEK | MSG_TRUNC))); - - /* Ugly... If prequeue is not empty, we have to - * process it before releasing socket, otherwise - * order will be broken at second iteration. - * More elegant solution is required!!! - * - * Look: we have the following (pseudo)queues: - * - * 1. packets in flight - * 2. backlog - * 3. prequeue - * 4. receive_queue - * - * Each queue can be processed only if the next ones - * are empty. At this point we have empty receive_queue. - * But prequeue _can_ be not empty after 2nd iteration, - * when we jumped to start of loop because backlog - * processing added something to receive_queue. - * We cannot release_sock(), because backlog contains - * packets arrived _after_ prequeued ones. - * - * Shortly, algorithm is clear --- to process all - * the queues in order. We could make it more directly, - * requeueing packets from backlog to prequeue, if - * is not empty. It is more elegant, but eats cycles, - * unfortunately. - */ - if (!skb_queue_empty(&tp->ucopy.prequeue)) - goto do_prequeue; - - /* __ Set realtime policy in scheduler __ */ - } - if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1859,31 +1811,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, sk_wait_data(sk, &timeo, last); } - if (user_recv) { - int chunk; - - /* __ Restore normal policy in scheduler __ */ - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); - len -= chunk; - copied += chunk; - } - - if (tp->rcv_nxt == tp->copied_seq && - !skb_queue_empty(&tp->ucopy.prequeue)) { -do_prequeue: - tcp_prequeue_process(sk); - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", @@ -1934,10 +1861,8 @@ do_prequeue: tcp_rcv_space_adjust(sk); skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { + if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) tp->urg_data = 0; - tcp_fast_path_check(sk); - } if (used + offset < skb->len) continue; @@ -1955,25 +1880,6 @@ skip_copy: break; } while (len > 0); - if (user_recv) { - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - int chunk; - - tp->ucopy.len = copied > 0 ? len : 0; - - tcp_prequeue_process(sk); - - if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - } - /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ @@ -2823,7 +2729,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now, intv; + u32 now; u64 rate64; bool slow; u32 rate; @@ -2922,13 +2828,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; - rate = READ_ONCE(tp->rate_delivered); - intv = READ_ONCE(tp->rate_interval_us); - if (rate && intv) { - rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; - do_div(rate64, intv); + rate64 = tcp_compute_delivery_rate(tp); + if (rate64) info->tcpi_delivery_rate = rate64; - } unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2938,8 +2840,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; struct tcp_info info; + u64 rate64; + u32 rate; - stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + + 3 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -2954,6 +2860,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) tp->data_segs_out, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, tp->total_retrans, TCP_NLA_PAD); + + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD); + + rate64 = tcp_compute_delivery_rate(tp); + nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); + + nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd); + nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); + nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); + + nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2920e0cb09f8..af0a98d54b62 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -103,7 +103,6 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ -#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ @@ -3367,12 +3366,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; - /* Note, it is the only place, where - * fast path is recovered for sending TCP. - */ - tp->pred_flags = 0; - tcp_fast_path_check(sk); - if (tcp_send_head(sk)) tcp_slow_start_after_idle_check(sk); @@ -3554,6 +3547,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + u32 ack_ev_flags = 0; sack_state.first_sackt = 0; sack_state.rate = &rs; @@ -3597,42 +3591,26 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (flag & FLAG_UPDATE_TS_RECENT) tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { - /* Window is constant, pure forward advance. - * No more checks are required. - * Note, we use the fact that SND.UNA>=SND.WL2. - */ - tcp_update_wl(tp, ack_seq); - tcp_snd_una_update(tp, ack); - flag |= FLAG_WIN_UPDATE; - - tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); - } else { - u32 ack_ev_flags = CA_ACK_SLOWPATH; - - if (ack_seq != TCP_SKB_CB(skb)->end_seq) - flag |= FLAG_DATA; - else - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); + if (ack_seq != TCP_SKB_CB(skb)->end_seq) + flag |= FLAG_DATA; + else + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); - flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); + flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); - if (TCP_SKB_CB(skb)->sacked) - flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_state); + if (TCP_SKB_CB(skb)->sacked) + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, + &sack_state); - if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { - flag |= FLAG_ECE; - ack_ev_flags |= CA_ACK_ECE; - } + if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { + flag |= FLAG_ECE; + ack_ev_flags = CA_ACK_ECE; + } - if (flag & FLAG_WIN_UPDATE) - ack_ev_flags |= CA_ACK_WIN_UPDATE; + if (flag & FLAG_WIN_UPDATE) + ack_ev_flags |= CA_ACK_WIN_UPDATE; - tcp_in_ack_event(sk, ack_ev_flags); - } + tcp_in_ack_event(sk, ack_ev_flags); /* We passed data and got it acked, remove any soft error * log. Something worked... @@ -4398,8 +4376,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } - /* Disable header prediction. */ - tp->pred_flags = 0; inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); @@ -4611,32 +4587,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) goto out_of_window; /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && - sock_owned_by_user(sk) && !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len); - tcp_rcv_space_adjust(sk); - } - } - - if (eaten <= 0) { queue_and_out: - if (eaten < 0) { - if (skb_queue_len(&sk->sk_receive_queue) == 0) - sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - } - eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); + if (eaten < 0) { + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; } + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); @@ -4656,8 +4614,6 @@ queue_and_out: if (tp->rx_opt.num_sacks) tcp_sack_remove(tp); - tcp_fast_path_check(sk); - if (eaten > 0) kfree_skb_partial(skb, fragstolen); if (!sock_flag(sk, SOCK_DEAD)) @@ -4983,7 +4939,6 @@ static int tcp_prune_queue(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ - tp->pred_flags = 0; return -1; } @@ -5155,9 +5110,6 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) tp->urg_data = TCP_URG_NOTYET; tp->urg_seq = ptr; - - /* Disable header prediction. */ - tp->pred_flags = 0; } /* This is the 'fast' part of urgent handling. */ @@ -5186,26 +5138,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t } } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int err; - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); - else - err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - } - - return err; -} - /* Accept RST for rcv_nxt - 1 after a FIN. * When tcp connections are abruptly terminated from Mac OSX (via ^C), a * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5336,201 +5268,29 @@ discard: /* * TCP receive function for the ESTABLISHED state. - * - * It is split into a fast path and a slow path. The fast path is - * disabled when: - * - A zero window was announced from us - zero window probing - * is only handled properly in the slow path. - * - Out of order segments arrived. - * - Urgent data is expected. - * - There is no buffer space left - * - Unexpected TCP flags/window values/header lengths are received - * (detected by checking the TCP header against pred_flags) - * - Data is sent in both directions. Fast path only supports pure senders - * or pure receivers (this means either the sequence number or the ack - * value must stay constant) - * - Unexpected TCP option. - * - * When these conditions are not satisfied it drops into a standard - * receive procedure patterned after RFC793 to handle all cases. - * The first three cases are guaranteed by proper pred_flags setting, - * the rest is checked inline. Fast processing is turned on in - * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); - /* - * Header prediction. - * The code loosely follows the one in the famous - * "30 instruction TCP receive" Van Jacobson mail. - * - * Van's trick is to deposit buffers into socket queue - * on a device interrupt, to call tcp_recv function - * on the receive process context and checksum and copy - * the buffer to user space. smart... - * - * Our current scheme is not silly either but we take the - * extra cost of the net_bh soft interrupt processing... - * We do checksum and copy also but from device to kernel. - */ tp->rx_opt.saw_tstamp = 0; - /* pred_flags is 0xS?10 << 16 + snd_wnd - * if header_prediction is to be made - * 'S' will always be tp->tcp_header_len >> 2 - * '?' will be 0 for the fast path, otherwise pred_flags is 0 to - * turn it off (when there are holes in the receive - * space for instance) - * PSH flag is ignored. - */ - - if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt && - !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { - int tcp_header_len = tp->tcp_header_len; - - /* Timestamp header prediction: tcp_header_len - * is automatically equal to th->doff*4 due to pred_flags - * match. - */ - - /* Check timestamp */ - if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - /* No? Slow path! */ - if (!tcp_parse_aligned_timestamp(tp, th)) - goto slow_path; - - /* If PAWS failed, check it more carefully in slow path */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) - goto slow_path; - - /* DO NOT update ts_recent here, if checksum fails - * and timestamp was corrupted part, it will result - * in a hung connection since we will drop all - * future packets due to the PAWS test. - */ - } - - if (len <= tcp_header_len) { - /* Bulk data transfer: sender */ - if (len == tcp_header_len) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - /* We know that such packets are checksummed - * on entry. - */ - tcp_ack(sk, skb, 0); - __kfree_skb(skb); - tcp_data_snd_check(sk); - return; - } else { /* Header too small */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); - goto discard; - } - } else { - int eaten = 0; - bool fragstolen = false; - - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - __skb_pull(skb, tcp_header_len); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete(skb)) - goto csum_error; - - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); - - /* Bulk data transfer: receiver */ - eaten = tcp_queue_rcv(sk, skb, tcp_header_len, - &fragstolen); - } - - tcp_event_data_recv(sk, skb); - - if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { - /* Well, only one small jumplet in fast path... */ - tcp_ack(sk, skb, FLAG_DATA); - tcp_data_snd_check(sk); - if (!inet_csk_ack_scheduled(sk)) - goto no_ack; - } - - __tcp_ack_snd_check(sk, 0); -no_ack: - if (eaten) - kfree_skb_partial(skb, fragstolen); - sk->sk_data_ready(sk); - return; - } - } - -slow_path: if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) goto discard; - /* - * Standard slow path. - */ - if (!tcp_validate_incoming(sk, skb, th, 1)) return; -step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) + if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0) goto discard; tcp_rcv_rtt_measure_ts(sk, skb); @@ -5584,11 +5344,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) if (sock_flag(sk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); - if (!tp->rx_opt.snd_wscale) - __tcp_fast_path_on(tp, tp->snd_wnd); - else - tp->pred_flags = 0; - + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + } } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5717,7 +5476,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_ack(sk, skb, FLAG_SLOWPATH); + tcp_ack(sk, skb, 0); /* Ok.. it's good. Set up sequence numbers and * move to established. @@ -5953,8 +5712,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; /* step 5: check the ACK field */ - acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT | + + acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT | FLAG_NO_CHALLENGE_ACK) > 0; if (!acceptable) { @@ -6022,7 +5781,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a20e7f03d5f7..9b51663cd5a4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,8 +85,6 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_low_latency __read_mostly; - #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); @@ -1458,7 +1456,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); return 0; } @@ -1541,61 +1539,6 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - */ -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - /* Before escaping RCU protected region, we need to take care of skb - * dst. Prequeue is only enabled for established sockets. - * For such sockets, we might need the skb dst only to set sk->sk_rx_dst - * Instead of doing full sk_rx_dst validity here, let's perform - * an optimistic check. - */ - if (likely(sk->sk_rx_dst)) - skb_dst_drop(skb); - else - skb_dst_force_safe(skb); - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (skb_queue_len(&tp->ucopy.prequeue) >= 32 || - tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED, - skb_queue_len(&tp->ucopy.prequeue)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb1); - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} -EXPORT_SYMBOL(tcp_prequeue); - bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; @@ -1770,8 +1713,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); + ret = tcp_v4_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } @@ -1936,9 +1878,6 @@ void tcp_v4_destroy_sock(struct sock *sk) } #endif - /* Clean prequeue, it must be empty really */ - __skb_queue_purge(&tp->ucopy.prequeue); - /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ff83c1637d8..1537b87c657f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -436,8 +436,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_sock *newtp = tcp_sk(newsk); /* Now setup tcp_sock */ - newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->segs_in = 1; @@ -445,7 +443,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; - tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); tcp_init_wl(newtp, treq->rcv_isn); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2f1588bf73da..d49bff51bdb7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -295,9 +295,7 @@ static u16 tcp_select_window(struct sock *sk) /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; - /* If we advertise zero window, disable fast path. */ if (new_win == 0) { - tp->pred_flags = 0; if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); @@ -2378,7 +2376,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, tlp_time_stamp, rto_time_stamp; - u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { @@ -2407,15 +2404,19 @@ bool tcp_schedule_loss_probe(struct sock *sk) tcp_send_head(sk)) return false; - /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + /* Probe timeout is 2*rtt. Add minimum RTO to account * for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. */ - timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; - if (tp->packets_out == 1) - timeout = max_t(u32, timeout, - (rtt + (rtt >> 1) + TCP_DELACK_MAX)); - timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + if (tp->srtt_us) { + timeout = usecs_to_jiffies(tp->srtt_us >> 2); + if (tp->packets_out == 1) + timeout += TCP_RTO_MIN; + else + timeout += TCP_TIMEOUT_MIN; + } else { + timeout = TCP_TIMEOUT_INIT; + } /* If RTO is shorter, just schedule TLP in its place. */ tlp_time_stamp = tcp_jiffies32 + timeout; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..697f4c67b2e3 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, BUG(); } - p->length = skb->len; + p->length = len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fe9a493d0208..449cd914d58e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk) tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { - timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); + timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c0feeeef962a..f753f9d2fee3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk) /* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); sk_mem_reclaim_partial(sk); @@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk) } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - struct sk_buff *skb; - - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - tp->ucopy.memory = 0; - } - if (inet_csk_ack_scheduled(sk)) { if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index bec9cafbe3f9..e5de84310949 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -154,24 +154,6 @@ static inline void update_rtt_min(struct westwood *w) } /* - * @westwood_fast_bw - * It is called when we are in fast path. In particular it is called when - * header prediction is successful. In such case in fact update is - * straight forward and doesn't need any particular care. - */ -static inline void westwood_fast_bw(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct westwood *w = inet_csk_ca(sk); - - westwood_update_window(sk); - - w->bk += tp->snd_una - w->snd_una; - w->snd_una = tp->snd_una; - update_rtt_min(w); -} - -/* * @westwood_acked_count * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. @@ -223,17 +205,12 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) static void tcp_westwood_ack(struct sock *sk, u32 ack_flags) { - if (ack_flags & CA_ACK_SLOWPATH) { - struct westwood *w = inet_csk_ca(sk); - - westwood_update_window(sk); - w->bk += westwood_acked_count(sk); + struct westwood *w = inet_csk_ca(sk); - update_rtt_min(w); - return; - } + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); - westwood_fast_bw(sk); + update_rtt_min(w); } static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 781250151d40..97658bfc1b58 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo, gso_partial; + bool remcsum, need_csum, offload_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && @@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * outer one so strip the existing checksum feature flags and * instead set the flag based on our outer checksum offload value. */ - if (remcsum || ufo) { + if (remcsum) { features &= ~NETIF_F_CSUM_MASK; if (!need_csum || offload_csum) features |= NETIF_F_HW_CSUM; @@ -189,66 +187,16 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - __wsum csum; - struct udphdr *uh; - struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) segs = skb_udp_tunnel_segment(skb, features, false); - goto out; - } - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - - uh = udp_hdr(skb); - iph = ip_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_NONE; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - segs = skb_segment(skb, features); -out: return segs; } @@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_ufo_fragment, + .gso_segment = udp4_tunnel_segment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 58bd39fb14b4..6539ff15e9a3 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, struct sock *sk = sock->sk; struct udp_tunnel_info ti; - if (!dev->netdev_ops->ndo_udp_tunnel_add) + if (!dev->netdev_ops->ndo_udp_tunnel_add || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; ti.type = type; @@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type) +{ + struct sock *sk = sock->sk; + struct udp_tunnel_info ti; + + if (!dev->netdev_ops->ndo_udp_tunnel_del || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); +} +EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); + /* Notify netdevs that UDP port started listening */ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) { @@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_add) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); } rcu_read_unlock(); @@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_del) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); } rcu_read_unlock(); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 71b4ecc195c7..4aefb149fe0a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_tos = iph->tos; } -static inline int xfrm4_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - - xfrm_garbage_collect_deferred(net); - return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, - .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b5b5b7955..0a7c74049a0c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -210,7 +210,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); + np->autoflowlabel = ip6_default_np_autolabel(net); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 162efba0d0cd..43ca864327c7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -static inline int ip6_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int exthdrlen, int transhdrlen, int mtu, - unsigned int flags, const struct flowi6 *fl6) - -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP large send offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_set_network_header(skb, exthdrlen); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->protocol = htons(ETH_P_IPV6); - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), - &fl6->daddr, - &fl6->saddr); - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1381,19 +1318,6 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || - (skb && skb_is_gso(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { - err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, exthdrlen, - transhdrlen, mtu, flags, fl6); - if (err) - goto error; - return 0; - } - if (!skb) goto alloc_new_skb; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 486c2305f53c..79444a4bfd6d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -static bool is_vti6_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti6_netdev_ops; -} - -static int vti6_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip6_tnl *t = netdev_priv(dev); - - if (!is_vti6_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(t->net, dev_net(dev))) - xfrm_garbage_collect(t->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti6_notifier_block __read_mostly = { - .notifier_call = vti6_device_event, -}; - /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; - register_netdevice_notifier(&vti6_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); - unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2521690d62d6..ced5dcf37465 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1296,7 +1296,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1505,8 +1505,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v6_do_rcv(sk, skb); + ret = tcp_v6_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a2267f80febb..455fd4e39333 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,109 +17,15 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *packet_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - __wsum csum; - int tnl_hlen; - int err; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - /* Set the IPv6 fragment id if not set yet */ - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - - segs = NULL; - goto out; - } if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); - else { - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - - uh = udp_hdr(skb); - ipv6h = ipv6_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v6_check(skb->len, &ipv6h->saddr, - &ipv6h->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_NONE; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Check if there is enough headroom to insert fragment header. */ - tnl_hlen = skb_tnl_header_len(skb); - if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { - if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) - goto out; - } - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - err = ip6_find_1stfragopt(skb, &prevhdr); - if (err < 0) - return ERR_PTR(err); - unfrag_ip6hlen = err; - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + - unfrag_ip6hlen + tnl_hlen; - packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; - memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); - - SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - } -out: return segs; } @@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_ufo_fragment, + .gso_segment = udp6_tunnel_segment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 79651bc71bf0..f44b25a48478 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - - xfrm_garbage_collect_deferred(net); - return dst_entries_get_fast(ops) > ops->gc_thresh * 2; -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, - .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..10d7133e4fe9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); - if (err == 0) - xfrm_garbage_collect(net); return err; } @@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); - if (!err) - xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 3f6c4fa78bdb..245fa350a7a8 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -static const struct file_operations recent_old_fops, recent_mt_fops; +static const struct file_operations recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..f6e229b51dfb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; - struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; - if (gso_type & SKB_GSO_UDP) { - /* The initial flow key extracted by ovs_flow_key_extract() - * in this case is for a first fragment, so we need to - * properly mark later fragments. - */ - later_key = *key; - later_key.ip.frag = OVS_FRAG_TYPE_LATER; - } - /* Queue all of the segments. */ skb = segs; do { - if (gso_type & SKB_GSO_UDP && skb != segs) - key = &later_key; - err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 3f76cb765e5b..8c94cef25a72 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, const struct sk_buff *skb) { struct flow_stats *stats; - int node = numa_node_id(); - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[cpu]); @@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, __GFP_THISNODE | __GFP_NOWARN | __GFP_NOMEMALLOC, - node); + numa_node_id()); if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; @@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, rcu_assign_pointer(flow->stats[cpu], new_stats); + cpumask_set_cpu(cpu, &flow->cpu_used_mask); goto unlock; } } @@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow, memset(ovs_stats, 0, sizeof(*ovs_stats)); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { @@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu; /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { @@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF) || - skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + if (nh->frag_off & htons(IP_MF)) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.frag = OVS_FRAG_TYPE_FIRST; - /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a9bc1c875965..1875bba4f865 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -31,6 +31,7 @@ #include <linux/jiffies.h> #include <linux/time.h> #include <linux/flex_array.h> +#include <linux/cpumask.h> #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> @@ -219,6 +220,7 @@ struct sw_flow { */ struct sw_flow_key key; struct sw_flow_id id; + struct cpumask cpu_used_mask; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each CPU. First one diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index ea7a8073fa02..80ea2a71852e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); + cpumask_set_cpu(0, &flow->cpu_used_mask); + return flow; err: kmem_cache_free(flow_cache, flow); @@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow) if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[cpu]); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0615c2a950fa..5a178047a7ce 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -#define PGV_FROM_VMALLOC 1 - #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) diff --git a/net/rds/connection.c b/net/rds/connection.c index 50a3789ac23e..005bca68aa94 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (!cp->cp_transport_data) return; - rds_conn_path_drop(cp); - flush_work(&cp->cp_down_w); - /* make sure lingering queued work won't try to ref the conn */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + rds_conn_path_drop(cp, true); + flush_work(&cp->cp_down_w); + /* tear down queued messages */ list_for_each_entry_safe(rm, rtmp, &cp->cp_send_queue, @@ -664,9 +664,13 @@ void rds_conn_exit(void) /* * Force a disconnect */ -void rds_conn_path_drop(struct rds_conn_path *cp) +void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); + + if (!destroy && cp->cp_conn->c_destroy_in_prog) + return; + queue_work(rds_wq, &cp->cp_down_w); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop); void rds_conn_drop(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); - rds_conn_path_drop(&conn->c_path[0]); + rds_conn_path_drop(&conn->c_path[0], false); } EXPORT_SYMBOL_GPL(rds_conn_drop); @@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) vprintk(fmt, ap); va_end(ap); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } diff --git a/net/rds/rds.h b/net/rds/rds.h index 516bcc89b46f..3382695bf46c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); -void rds_conn_path_drop(struct rds_conn_path *cpath); +void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); void rds_for_each_conn_info(struct socket *sock, unsigned int len, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 431404dbdad1..6b7ee71f40c6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net) continue; /* reconnect with new parameters */ - rds_conn_path_drop(tc->t_cpath); + rds_conn_path_drop(tc->t_cpath, false); } spin_unlock_irq(&rds_tcp_conn_lock); } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index cbe08a1fa4c7..46f74dad0e16 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk) if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } else { rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); default: break; } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 0d8616aa5bad..dc860d1bb608 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -157,7 +157,7 @@ out: "returned %d, " "disconnecting and reconnecting\n", &conn->c_faddr, cp->cp_index, ret); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } } } diff --git a/net/rds/threads.c b/net/rds/threads.c index 2852bc1d37d4..f121daa402c8 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) "current state is %d\n", __func__, atomic_read(&cp->cp_state)); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); return; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 69b97339ff9d..8c0db9b3e4ab 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -15,7 +15,7 @@ #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> -#include <rxrpc/packet.h> +#include "protocol.h" #if 0 #define CHECK_SLAB_OKAY(X) \ diff --git a/include/rxrpc/packet.h b/net/rxrpc/protocol.h index a2dcfb850b9f..4bddcf3face3 100644 --- a/include/rxrpc/packet.h +++ b/net/rxrpc/protocol.h @@ -187,49 +187,4 @@ struct rxkad_response { __be32 ticket_len; /* Kerberos ticket length */ } __packed; -/*****************************************************************************/ -/* - * RxRPC-level abort codes - */ -#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ -#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ -#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ -#define RX_EOF -4 /* unexpected end of data on read op */ -#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ -#define RX_USER_ABORT -6 /* generic user abort */ -#define RX_ADDRINUSE -7 /* UDP port in use */ -#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ - -/* - * (un)marshalling abort codes (rxgen) - */ -#define RXGEN_CC_MARSHAL -450 -#define RXGEN_CC_UNMARSHAL -451 -#define RXGEN_SS_MARSHAL -452 -#define RXGEN_SS_UNMARSHAL -453 -#define RXGEN_DECODE -454 -#define RXGEN_OPCODE -455 -#define RXGEN_SS_XDRFREE -456 -#define RXGEN_CC_XDRFREE -457 - -/* - * Rx kerberos security abort codes - * - unfortunately we have no generalised security abort codes to say things - * like "unsupported security", so we have to use these instead and hope the - * other side understands - */ -#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ -#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ -#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ -#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ -#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ -#define RXKADNOAUTH 19270405 /* caller not authorised */ -#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ -#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ -#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ -#define RXKADEXPIRED 19270409 /* authentication expired */ -#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ -#define RXKADDATALEN 19270411 /* user data too long */ -#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ - #endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2e9ed34a963..f19b118df414 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -110,6 +110,8 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + u32 act_flags = cb->args[2]; + unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; spin_lock_bh(&hinfo->lock); @@ -127,6 +129,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, if (index < s_i) continue; + if (jiffy_since && + time_after(jiffy_since, + (unsigned long)p->tcfa_tm.lastuse)) + continue; + nest = nla_nest_start(skb, n_i); if (nest == NULL) goto nla_put_failure; @@ -138,14 +145,20 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, } nla_nest_end(skb, nest); n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) goto done; } } done: + if (index >= 0) + cb->args[0] = index + 1; + spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; + if (n_i) { + if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + cb->args[1] = n_i; + } return n_i; nla_put_failure: @@ -1068,11 +1081,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } +static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { + [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &tcaa_root_flags_allowed }, + [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, +}; + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct nlattr *tca[TCA_ACT_MAX + 1]; + struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; @@ -1080,7 +1100,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1121,16 +1141,12 @@ replay: return ret; } -static struct nlattr *find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, - NULL, NULL) < 0) - return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; @@ -1157,8 +1173,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); - struct nlattr *kind = find_dump_kind(cb->nlh); + struct nlattr *tb[TCA_ROOT_MAX + 1]; + struct nlattr *count_attr = NULL; + unsigned long jiffy_since = 0; + struct nlattr *kind = NULL; + struct nla_bitfield32 bf; + u32 msecs_since = 0; + u32 act_count = 0; + + ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, + tcaa_policy, NULL); + if (ret < 0) + return ret; + kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; @@ -1168,14 +1196,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o == NULL) return 0; + cb->args[2] = 0; + if (tb[TCA_ROOT_FLAGS]) { + bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); + cb->args[2] = bf.value; + } + + if (tb[TCA_ROOT_TIME_DELTA]) { + msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); + } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; + + if (msecs_since) + jiffy_since = jiffies - msecs_to_jiffies(msecs_since); + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; + cb->args[3] = jiffy_since; + count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); + if (!count_attr) + goto out_module_put; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) @@ -1188,6 +1234,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; + act_count = cb->args[1]; + memcpy(nla_data(count_attr), &act_count, sizeof(u32)); + cb->args[1] = 0; } else nlmsg_trim(skb, b); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da..67afc12df88b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -231,9 +231,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -287,9 +284,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sctp/auth.c b/net/sctp/auth.c index e001b01b0e68..00667c50efa7 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * are called the two key vectors. */ static struct sctp_auth_bytes *sctp_auth_make_key_vector( - sctp_random_param_t *random, - sctp_chunks_param_t *chunks, - sctp_hmac_algo_param_t *hmacs, + struct sctp_random_param *random, + struct sctp_chunks_param *chunks, + struct sctp_hmac_algo_param *hmacs, gfp_t gfp) { struct sctp_auth_bytes *new; @@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t *)asoc->c.auth_random, - (sctp_chunks_param_t *)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, - gfp); + (struct sctp_random_param *)asoc->c.auth_random, + (struct sctp_chunks_param *)asoc->c.auth_chunks, + (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp); } /* Make a key vector based on peer's parameters */ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 1323d41e68b8..681b181e7ae3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max_data) - first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); + first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 0e86f988f836..3d506b2f6193 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ - auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + - sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + auth_hmacs = kzalloc(sizeof(*auth_hmacs) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); if (!auth_hmacs) goto nomem; - auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + - SCTP_NUM_CHUNK_TYPES, gfp); + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..107d7c912922 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr, static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv6addr_param_t); + int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e8762702a313..d2a8adfd4a6f 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1197,7 +1197,7 @@ sctp_flush_out: static void sctp_sack_update_unack_data(struct sctp_association *assoc, struct sctp_sackhdr *sack) { - sctp_sack_variable_t *frags; + union sctp_sack_variable *frags; __u16 unack_data; int i; @@ -1224,7 +1224,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; - sctp_sack_variable_t *frags = sack->variable; + union sctp_sack_variable *frags = sack->variable; __u32 sack_ctsn, ctsn, tsn; __u32 highest_tsn, highest_new_tsn; __u32 sack_a_rwnd; @@ -1736,10 +1736,10 @@ static void sctp_mark_missing(struct sctp_outq *q, /* Is the given TSN acked by this packet? */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { - int i; - sctp_sack_variable_t *frags; - __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); + union sctp_sack_variable *frags; + __u16 tsn_offset, blocks; + int i; if (TSN_lte(tsn, ctsn)) goto pass; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 989a900383b5..852556d67ae3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, static int sctp_v4_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv4addr_param_t); + int length = sizeof(struct sctp_ipv4addr_param); param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS; param->v4.param_hdr.length = htons(length); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6110447fe51d..163004e7047c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp); -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, int *cookie_len, @@ -223,10 +224,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; struct sctp_sock *sp; - sctp_supported_addrs_param_t sat; + struct sctp_supported_addrs_param sat; __be16 types[2]; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -305,8 +306,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -348,10 +348,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, */ if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } @@ -390,11 +388,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, union sctp_params addrs; struct sctp_sock *sp; int addrs_len; - sctp_cookie_param_t *cookie; + struct sctp_cookie_param *cookie; int cookie_len; size_t chunksize; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -468,8 +466,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -495,10 +492,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) @@ -1601,14 +1596,15 @@ nodata: /* Build a cookie representing asoc. * This INCLUDES the param header needed to put the cookie in the INIT ACK. */ -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const struct sctp_chunk *init_chunk, - int *cookie_len, - const __u8 *raw_addrs, int addrs_len) +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const struct sctp_chunk *init_chunk, + int *cookie_len, + const __u8 *raw_addrs, int addrs_len) { - sctp_cookie_param_t *retval; struct sctp_signed_cookie *cookie; + struct sctp_cookie_param *retval; int headersize, bodysize; /* Header size is static data prior to the actual cookie, including @@ -3153,7 +3149,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: - if (length != sizeof(sctp_ipv4addr_param_t)) + if (length != sizeof(struct sctp_ipv4addr_param)) return false; /* ensure there is only one addr param and it's in the * beginning of addip_hdr params, or we reject it. @@ -3163,7 +3159,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: - if (length != sizeof(sctp_ipv6addr_param_t)) + if (length != sizeof(struct sctp_ipv6addr_param)) return false; if (param.v != addip->addip_hdr.params) return false; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b2a74c3823ee..dc0c2c4188d8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -306,12 +306,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; int len; /* 6.10 Bundling @@ -435,7 +433,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -518,7 +516,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ @@ -1090,7 +1088,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_heartbeat_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1098,7 +1097,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ - chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data; + chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data; param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); @@ -1419,13 +1418,11 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - sctp_disposition_t retval; - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; + sctp_disposition_t retval; int len; /* 6.10 Bundling @@ -1555,7 +1552,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -2167,7 +2164,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2209,7 +2206,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2336,13 +2333,12 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - u32 stale; - sctp_cookie_preserve_param_t bht; - sctp_errhdr_t *err; - struct sctp_chunk *reply; - struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct sctp_chunk *chunk = arg, *reply; + struct sctp_cookie_preserve_param bht; + struct sctp_bind_addr *bp; + sctp_errhdr_t *err; + u32 stale; if (attempts > asoc->max_init_attempts) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, @@ -2474,7 +2470,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2550,7 +2546,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ @@ -3192,14 +3188,14 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4454,11 +4450,10 @@ static sctp_disposition_t sctp_sf_abort_violation( /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && !asoc->peer.i.init_tag) { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; - if (!sctp_chunk_length_valid(chunk, - sizeof(sctp_initack_chunk_t))) + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; + if (!sctp_chunk_length_valid(chunk, sizeof(*initack))) abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T; else { unsigned int inittag; @@ -4521,7 +4516,7 @@ nomem: * Handle a protocol violation when the chunk length is invalid. * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if its length is set to be smaller than the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of struct sctp_sack_chunk. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -6107,9 +6102,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; vtag = ntohl(initack->init_hdr.init_tag); break; } diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 33954852f3f8..c717ef0896aa 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,10 +8,6 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. - Warning: SMC will expose all memory for remote reads and writes - once a connection is established. Don't enable this option except - for tightly controlled lab environment. - Select this option if you want to run SMC socket applications config SMC_DIAG diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6793d7348cc8..8c6d24b2995d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -338,6 +338,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return SMC_CLC_DECL_INTERR; smc_wr_remember_qp_attr(link); + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -430,12 +436,8 @@ static int smc_connect_rdma(struct smc_sock *smc) smc_conn_save_peer_info(smc, &aclc); - rc = smc_sndbuf_create(smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma_unlock; - } - rc = smc_rmb_create(smc); + /* create send buffer and rmb */ + rc = smc_buf_create(smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; goto decline_rdma_unlock; @@ -459,7 +461,20 @@ static int smc_connect_rdma(struct smc_sock *smc) reason_code = SMC_CLC_DECL_INTERR; goto decline_rdma_unlock; } + } else { + struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma_unlock; + } + } } + smc_rmb_sync_sg_for_device(&smc->conn); rc = smc_clc_send_confirm(smc); if (rc) @@ -692,6 +707,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) int rc; link = &lgr->lnk[SMC_SINGLE_LINK]; + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK request to client over the RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -779,11 +800,6 @@ static void smc_listen_work(struct work_struct *work) mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, smcibdev, ibport, &pclc.lcl, 0); - if (local_contact == SMC_REUSE_CONTACT) - /* lock no longer needed, free it due to following - * smc_clc_wait_msg() call - */ - mutex_unlock(&smc_create_lgr_pending); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -794,12 +810,8 @@ static void smc_listen_work(struct work_struct *work) } link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; - rc = smc_sndbuf_create(new_smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma; - } - rc = smc_rmb_create(new_smc); + /* create send buffer and rmb */ + rc = smc_buf_create(new_smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; goto decline_rdma; @@ -808,6 +820,21 @@ static void smc_listen_work(struct work_struct *work) smc_close_init(new_smc); smc_rx_init(new_smc); + if (local_contact != SMC_FIRST_CONTACT) { + struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma; + } + } + } + smc_rmb_sync_sg_for_device(&new_smc->conn); + rc = smc_clc_send_accept(new_smc, local_contact); if (rc) goto out_err; @@ -853,8 +880,7 @@ out_connected: if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; enqueue: - if (local_contact == SMC_FIRST_CONTACT) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_create_lgr_pending); lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { smc_accept_enqueue(&lsmc->sk, newsmcsk); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 03ec058d18df..3934913ab835 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -204,13 +204,13 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + cclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(cclc.psn, link->psn_initial); memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -256,13 +256,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + aclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(aclc.psn, link->psn_initial); memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3ac09a629ea1..1a16d51e2330 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -175,7 +175,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, rc = smc_wr_alloc_link_mem(lnk); if (rc) goto free_lgr; - init_waitqueue_head(&lnk->wr_tx_wait); rc = smc_ib_create_protection_domain(lnk); if (rc) goto free_link_mem; @@ -207,17 +206,14 @@ out: return rc; } -static void smc_sndbuf_unuse(struct smc_connection *conn) +static void smc_buf_unuse(struct smc_connection *conn) { if (conn->sndbuf_desc) { conn->sndbuf_desc->used = 0; conn->sndbuf_size = 0; } -} - -static void smc_rmb_unuse(struct smc_connection *conn) -{ if (conn->rmb_desc) { + conn->rmb_desc->reused = true; conn->rmb_desc->used = 0; conn->rmbe_size = 0; } @@ -232,8 +228,7 @@ void smc_conn_free(struct smc_connection *conn) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); - smc_rmb_unuse(conn); - smc_sndbuf_unuse(conn); + smc_buf_unuse(conn); } static void smc_link_clear(struct smc_link *lnk) @@ -246,48 +241,57 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_lgr_free_sndbufs(struct smc_link_group *lgr) +static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, + bool is_rmb) { - struct smc_buf_desc *sndbuf_desc, *bf_desc; - int i; - - for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i], - list) { - list_del(&sndbuf_desc->list); - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - smc_uncompress_bufsize(i), - sndbuf_desc, DMA_TO_DEVICE); - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - } + if (is_rmb) { + if (buf_desc->mr_rx[SMC_SINGLE_LINK]) + smc_ib_put_memory_region( + buf_desc->mr_rx[SMC_SINGLE_LINK]); + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_FROM_DEVICE); + } else { + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_TO_DEVICE); } + sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); + if (buf_desc->cpu_addr) + free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order); + kfree(buf_desc); } -static void smc_lgr_free_rmbs(struct smc_link_group *lgr) +static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { - struct smc_buf_desc *rmb_desc, *bf_desc; struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + struct smc_buf_desc *buf_desc, *bf_desc; + struct list_head *buf_list; int i; for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i], + if (is_rmb) + buf_list = &lgr->rmbs[i]; + else + buf_list = &lgr->sndbufs[i]; + list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list) { - list_del(&rmb_desc->list); - smc_ib_buf_unmap(lnk->smcibdev, - smc_uncompress_bufsize(i), - rmb_desc, DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); + list_del(&buf_desc->list); + smc_buf_free(buf_desc, lnk, is_rmb); } } } +static void smc_lgr_free_bufs(struct smc_link_group *lgr) +{ + /* free send buffers */ + __smc_lgr_free_bufs(lgr, false); + /* free rmbs */ + __smc_lgr_free_bufs(lgr, true); +} + /* remove a link group */ void smc_lgr_free(struct smc_link_group *lgr) { - smc_lgr_free_rmbs(lgr); - smc_lgr_free_sndbufs(lgr); + smc_lgr_free_bufs(lgr); smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); } @@ -452,45 +456,25 @@ out: return rc ? rc : local_contact; } -/* try to reuse a sndbuf description slot of the sndbufs list for a certain - * buf_size; if not available, return NULL +/* try to reuse a sndbuf or rmb description slot for a certain + * buffer size; if not available, return NULL */ static inline -struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) +struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, + int compressed_bufsize, + rwlock_t *lock, + struct list_head *buf_list) { - struct smc_buf_desc *sndbuf_slot; - - read_lock_bh(&lgr->sndbufs_lock); - list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize], - list) { - if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->sndbufs_lock); - return sndbuf_slot; - } - } - read_unlock_bh(&lgr->sndbufs_lock); - return NULL; -} + struct smc_buf_desc *buf_slot; -/* try to reuse an rmb description slot of the rmbs list for a certain - * rmbe_size; if not available, return NULL - */ -static inline -struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) -{ - struct smc_buf_desc *rmb_slot; - - read_lock_bh(&lgr->rmbs_lock); - list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize], - list) { - if (cmpxchg(&rmb_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->rmbs_lock); - return rmb_slot; + read_lock_bh(lock); + list_for_each_entry(buf_slot, buf_list, list) { + if (cmpxchg(&buf_slot->used, 0, 1) == 0) { + read_unlock_bh(lock); + return buf_slot; } } - read_unlock_bh(&lgr->rmbs_lock); + read_unlock_bh(lock); return NULL; } @@ -503,136 +487,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -/* create the tx buffer for an SMC socket */ -int smc_sndbuf_create(struct smc_sock *smc) +static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, + bool is_rmb, int bufsize) { - struct smc_connection *conn = &smc->conn; - struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *sndbuf_desc; + struct smc_buf_desc *buf_desc; + struct smc_link *lnk; int rc; - /* use socket send buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable sndbuf_slot in the link group */ - sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short); - if (sndbuf_desc) { - memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize); - break; /* found reusable slot */ - } - /* try to alloc a new send buffer */ - sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL); - if (!sndbuf_desc) - break; /* give up with -ENOMEM */ - sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!sndbuf_desc->cpu_addr) { - kfree(sndbuf_desc); - sndbuf_desc = NULL; - /* if send buffer allocation has failed, - * try a smaller one - */ - continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, sndbuf_desc, - DMA_TO_DEVICE); + /* try to alloc a new buffer */ + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return ERR_PTR(-ENOMEM); + + buf_desc->cpu_addr = + (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_ZERO, + get_order(bufsize)); + if (!buf_desc->cpu_addr) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->order = get_order(bufsize); + + /* build the sg table from the pages */ + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, + GFP_KERNEL); + if (rc) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); + } + sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, + buf_desc->cpu_addr, bufsize); + + /* map sg table to DMA address */ + rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + /* SMC protocol depends on mapping to one DMA address only */ + if (rc != 1) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(-EAGAIN); + } + + /* create a new memory region for the RMB */ + if (is_rmb) { + rc = smc_ib_get_memory_region(lnk->roce_pd, + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE, + buf_desc); if (rc) { - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - sndbuf_desc = NULL; - continue; /* if mapping failed, try smaller one */ + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); } - sndbuf_desc->used = 1; - write_lock_bh(&lgr->sndbufs_lock); - list_add(&sndbuf_desc->list, - &lgr->sndbufs[tmp_bufsize_short]); - write_unlock_bh(&lgr->sndbufs_lock); - break; - } - if (sndbuf_desc && sndbuf_desc->cpu_addr) { - conn->sndbuf_desc = sndbuf_desc; - conn->sndbuf_size = tmp_bufsize; - smc->sk.sk_sndbuf = tmp_bufsize * 2; - atomic_set(&conn->sndbuf_space, tmp_bufsize); - return 0; - } else { - return -ENOMEM; } + + return buf_desc; } -/* create the RMB for an SMC socket (even though the SMC protocol - * allows more than one RMB-element per RMB, the Linux implementation - * uses just one RMB-element per RMB, i.e. uses an extra RMB for every - * connection in a link group - */ -int smc_rmb_create(struct smc_sock *smc) +static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *rmb_desc; - int rc; + struct smc_buf_desc *buf_desc = NULL; + struct list_head *buf_list; + int bufsize, bufsize_short; + int sk_buf_size; + rwlock_t *lock; + + if (is_rmb) + /* use socket recv buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_rcvbuf / 2; + else + /* use socket send buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_sndbuf / 2; + + for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); + bufsize_short >= 0; bufsize_short--) { + + if (is_rmb) { + lock = &lgr->rmbs_lock; + buf_list = &lgr->rmbs[bufsize_short]; + } else { + lock = &lgr->sndbufs_lock; + buf_list = &lgr->sndbufs[bufsize_short]; + } + bufsize = smc_uncompress_bufsize(bufsize_short); + if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) + continue; - /* use socket recv buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable rmb_slot in the link group */ - rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short); - if (rmb_desc) { - memset(rmb_desc->cpu_addr, 0, tmp_bufsize); + /* check for reusable slot in the link group */ + buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); + if (buf_desc) { + memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ } - /* try to alloc a new RMB */ - rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL); - if (!rmb_desc) - break; /* give up with -ENOMEM */ - rmb_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!rmb_desc->cpu_addr) { - kfree(rmb_desc); - rmb_desc = NULL; - /* if RMB allocation has failed, - * try a smaller one - */ + + buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); + if (PTR_ERR(buf_desc) == -ENOMEM) + break; + if (IS_ERR(buf_desc)) continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - if (rc) { - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; /* if mapping failed, try smaller one */ - } - rmb_desc->rkey[SMC_SINGLE_LINK] = - lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; - rmb_desc->used = 1; - write_lock_bh(&lgr->rmbs_lock); - list_add(&rmb_desc->list, - &lgr->rmbs[tmp_bufsize_short]); - write_unlock_bh(&lgr->rmbs_lock); - break; + + buf_desc->used = 1; + write_lock_bh(lock); + list_add(&buf_desc->list, buf_list); + write_unlock_bh(lock); + break; /* found */ } - if (rmb_desc && rmb_desc->cpu_addr) { - conn->rmb_desc = rmb_desc; - conn->rmbe_size = tmp_bufsize; - conn->rmbe_size_short = tmp_bufsize_short; - smc->sk.sk_rcvbuf = tmp_bufsize * 2; + + if (IS_ERR(buf_desc)) + return -ENOMEM; + + if (is_rmb) { + conn->rmb_desc = buf_desc; + conn->rmbe_size = bufsize; + conn->rmbe_size_short = bufsize_short; + smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); - conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize); - return 0; + conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); } else { - return -ENOMEM; + conn->sndbuf_desc = buf_desc; + conn->sndbuf_size = bufsize; + smc->sk.sk_sndbuf = bufsize * 2; + atomic_set(&conn->sndbuf_space, bufsize); } + return 0; +} + +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +void smc_rmb_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +/* create the send and receive buffer for an SMC socket; + * receive buffers are called RMBs; + * (even though the SMC protocol allows more than one RMB-element per RMB, + * the Linux implementation uses just one RMB-element per RMB, i.e. uses an + * extra RMB for every connection in a link group + */ +int smc_buf_create(struct smc_sock *smc) +{ + int rc; + + /* create send buffer */ + rc = __smc_buf_create(smc, false); + if (rc) + return rc; + /* create rmb */ + rc = __smc_buf_create(smc, true); + if (rc) + smc_buf_free(smc->conn.sndbuf_desc, + &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); + return rc; } static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b013cb43a327..19c44bf4e391 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -37,6 +37,14 @@ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; +#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ + +enum smc_wr_reg_state { + POSTED, /* ib_wr_reg_mr request posted */ + CONFIRMED, /* ib_wr_reg_mr response: successful */ + FAILED /* ib_wr_reg_mr response: failure */ +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -65,6 +73,10 @@ struct smc_link { u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ + struct ib_reg_wr wr_reg; /* WR register memory region */ + wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ + enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ + union ib_gid gid; /* gid matching used vlan id */ u32 peer_qpn; /* QP number of peer */ enum ib_mtu path_mtu; /* used mtu */ @@ -90,14 +102,15 @@ struct smc_link { /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ struct smc_buf_desc { struct list_head list; - u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; - /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - u32 rkey[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: - * rkey provided to peer + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer */ + u32 order; /* allocation order */ u32 used; /* currently used / unused */ + bool reused; /* new created / reused */ }; struct smc_rtoken { /* address/key of remote RMB */ @@ -173,9 +186,11 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); -int smc_sndbuf_create(struct smc_sock *smc); -int smc_rmb_create(struct smc_sock *smc); +int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); - +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); +void smc_rmb_sync_sg_for_device(struct smc_connection *conn); #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index b31715505a35..547e0e113b17 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -13,6 +13,7 @@ #include <linux/random.h> #include <linux/workqueue.h> +#include <linux/scatterlist.h> #include <rdma/ib_verbs.h> #include "smc_pnet.h" @@ -192,8 +193,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, - IB_PD_UNSAFE_GLOBAL_RKEY); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; @@ -232,10 +232,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { - .max_send_wr = SMC_WR_BUF_CNT, /* include unsolicited rdma_writes as well, * there are max. 2 RDMA_WRITE per 1 WR_SEND */ + .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, @@ -254,33 +254,117 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) return rc; } -/* map a new TX or RX buffer to DMA */ -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction) +void smc_ib_put_memory_region(struct ib_mr *mr) { - int rc = 0; + ib_dereg_mr(mr); +} - if (buf_slot->dma_addr[SMC_SINGLE_LINK]) - return rc; /* already mapped */ - buf_slot->dma_addr[SMC_SINGLE_LINK] = - ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr, - buf_size, data_direction); - if (ib_dma_mapping_error(smcibdev->ibdev, - buf_slot->dma_addr[SMC_SINGLE_LINK])) - rc = -EIO; - return rc; +static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot) +{ + unsigned int offset = 0; + int sg_num; + + /* map the largest prefix of a dma mapped SG list */ + sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK], + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + &offset, PAGE_SIZE); + + return sg_num; +} + +/* Allocate a memory region and map the dma mapped SG list of buf_slot */ +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot) +{ + if (buf_slot->mr_rx[SMC_SINGLE_LINK]) + return 0; /* already done */ + + buf_slot->mr_rx[SMC_SINGLE_LINK] = + ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); + if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) { + int rc; + + rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]); + buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL; + return rc; + } + + if (smc_ib_map_mr_sg(buf_slot) != 1) + return -EINVAL; + + return 0; } -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size, +/* synchronize buffer usage for cpu access */ +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_cpu(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } +} + +/* synchronize buffer usage for device access */ +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_device(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } +} + +/* Map a new TX or RX buffer SG-table to DMA */ +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { - if (!buf_slot->dma_addr[SMC_SINGLE_LINK]) + int mapped_nents; + + mapped_nents = ib_dma_map_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + if (!mapped_nents) + return -ENOMEM; + + return mapped_nents; +} + +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address) return; /* already unmapped */ - ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size, - data_direction); - buf_slot->dma_addr[SMC_SINGLE_LINK] = 0; + + ib_dma_unmap_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; } static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index b567152a526d..9b927a33d5e6 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -51,12 +51,12 @@ int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction); -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize, +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); @@ -65,6 +65,13 @@ int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); - - +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot); +void smc_ib_put_memory_region(struct ib_mr *mr); +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); #endif diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index f0c8b089f770..b17a333e9bb0 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -170,6 +170,7 @@ copy: copylen, conn->rmbe_size - cons.count); chunk_len_sum = chunk_len; chunk_off = cons.count; + smc_rmb_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { if (!(flags & MSG_TRUNC)) { rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off, @@ -177,6 +178,7 @@ copy: if (rc) { if (!read_done) read_done = -EFAULT; + smc_rmb_sync_sg_for_device(conn); goto out; } } @@ -190,6 +192,7 @@ copy: chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in recv ring buffer */ } + smc_rmb_sync_sg_for_device(conn); /* update cursors */ if (!(flags & MSG_PEEK)) { diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 21ec1832ab51..3c656beb8820 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -174,10 +174,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) copylen, conn->sndbuf_size - tx_cnt_prep); chunk_len_sum = chunk_len; chunk_off = tx_cnt_prep; + smc_sndbuf_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { rc = memcpy_from_msg(sndbuf_base + chunk_off, msg, chunk_len); if (rc) { + smc_sndbuf_sync_sg_for_device(conn); if (send_done) return send_done; goto out_err; @@ -192,6 +194,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in send ring buffer */ } + smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ smc_curs_add(conn->sndbuf_size, &prep, copylen); smc_curs_write(&conn->tx_curs_prep, @@ -277,6 +280,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int to_send, rmbespace; struct smc_link *link; + dma_addr_t dma_addr; int num_sges; int rc; @@ -334,12 +338,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) src_len = conn->sndbuf_size - sent.count; } src_len_sum = src_len; + dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); for (dstchunk = 0; dstchunk < 2; dstchunk++) { num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = - conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] + - src_off; + sges[srcchunk].addr = dma_addr + src_off; sges[srcchunk].length = src_len; sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; num_sges++; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 874ee9f9d796..ab56bda66783 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -68,6 +68,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) int i; link = wc->qp->qp_context; + + if (wc->opcode == IB_WC_REG_MR) { + if (wc->status) + link->wr_reg_state = FAILED; + else + link->wr_reg_state = CONFIRMED; + wake_up(&link->wr_reg_wait); + return; + } + pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); if (pnd_snd_idx == link->wr_tx_cnt) return; @@ -243,6 +253,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +/* Register a memory region and wait for result. */ +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) +{ + struct ib_send_wr *failed_wr = NULL; + int rc; + + ib_req_notify_cq(link->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + link->wr_reg_state = POSTED; + link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; + link->wr_reg.mr = mr; + link->wr_reg.key = mr->rkey; + failed_wr = &link->wr_reg.wr; + rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); + WARN_ON(failed_wr != &link->wr_reg.wr); + if (rc) + return rc; + + rc = wait_event_interruptible_timeout(link->wr_reg_wait, + (link->wr_reg_state != POSTED), + SMC_WR_REG_MR_WAIT_TIME); + if (!rc) { + /* timeout - terminate connections */ + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_lgr_terminate(lgr); + return -EPIPE; + } + if (rc == -ERESTARTSYS) + return -EINTR; + switch (link->wr_reg_state) { + case CONFIRMED: + rc = 0; + break; + case FAILED: + rc = -EIO; + break; + case POSTED: + rc = -EPIPE; + break; + } + return rc; +} + void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, @@ -458,6 +514,11 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; lnk->wr_rx_ibs[i].num_sge = 1; } + lnk->wr_reg.wr.next = NULL; + lnk->wr_reg.wr.num_sge = 0; + lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; + lnk->wr_reg.wr.opcode = IB_WR_REG_MR; + lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; } void smc_wr_free_link(struct smc_link *lnk) @@ -602,6 +663,8 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + init_waitqueue_head(&lnk->wr_tx_wait); + init_waitqueue_head(&lnk->wr_reg_wait); return rc; dma_unmap: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 0b9beeda6053..45eb53833052 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -102,5 +102,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context); +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr); #endif /* SMC_WR_H */ diff --git a/net/socket.c b/net/socket.c index ad22df1ffbd1..cb0fdf799f40 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3405,7 +3405,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk) struct inet_sock *inet; struct ip_options_rcu *opt; u32 overhead = 0; - bool owned_by_user; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np; struct ipv6_txoptions *optv6 = NULL; @@ -3414,13 +3413,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk) if (!sk) return overhead; - owned_by_user = sock_owned_by_user(sk); switch (sk->sk_family) { case AF_INET: inet = inet_sk(sk); overhead += sizeof(struct iphdr); opt = rcu_dereference_protected(inet->inet_opt, - owned_by_user); + sock_owned_by_user(sk)); if (opt) overhead += opt->opt.optlen; return overhead; @@ -3430,7 +3428,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk) overhead += sizeof(struct ipv6hdr); if (np) optv6 = rcu_dereference_protected(np->opt, - owned_by_user); + sock_owned_by_user(sk)); if (optv6) overhead += (optv6->opt_flen + optv6->opt_nflen); return overhead; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..5c53f22d62e8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p) return false; } -#define MAX_RECURSION_LEVEL 4 - static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - unsigned char max_level = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; - /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a @@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + return 0; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; long timeo; struct scm_cookie scm; - int max_level; int data_len = 0; int sk_locked; @@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; - max_level = err + 1; skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1819,8 +1804,6 @@ restart_locked: __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int max_level; int data_len; wait_for_unix_gc(); @@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, kfree_skb(skb); goto out_err; } - max_level = err + 1; fds_sent = true; skb_put(skb, size - data_len); @@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2324,7 +2303,6 @@ redo: last_len = last ? last->len : 0; again: if (skb == NULL) { - unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f7e8bfa0c2d..5cd7a244e88d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev) { + xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_garbage_collect(dev_net(dev)); - + xfrm_policy_cache_flush(); return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..06c3bf7ab86b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -24,6 +24,7 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/cpu.h> #include <linux/audit.h> #include <net/dst.h> #include <net/flow.h> @@ -44,6 +45,8 @@ struct xfrm_flo { u8 flags; }; +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); +static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -246,36 +249,6 @@ expired: xfrm_pol_put(xp); } -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - if (unlikely(pol->walk.dead)) - flo = NULL; - else - xfrm_pol_hold(pol); - - return flo; -} - -static int xfrm_policy_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - return !pol->walk.dead; -} - -static void xfrm_policy_flo_delete(struct flow_cache_object *flo) -{ - xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); -} - -static const struct flow_cache_ops xfrm_policy_fc_ops = { - .get = xfrm_policy_flo_get, - .check = xfrm_policy_flo_check, - .delete = xfrm_policy_flo_delete, -}; - /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ @@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) (unsigned long)policy); setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, (unsigned long)policy); - policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); __xfrm_policy_link(policy, dir); - atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; + else + xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1175,7 +1148,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static int flow_to_policy_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - - switch (dir) { - default: - case FLOW_DIR_IN: - return XFRM_POLICY_IN; - case FLOW_DIR_OUT: - return XFRM_POLICY_OUT; - case FLOW_DIR_FWD: - return XFRM_POLICY_FWD; - } -} - -static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -{ - struct xfrm_policy *pol; - - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - - pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); - if (IS_ERR_OR_NULL(pol)) - return ERR_CAST(pol); - - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); - - return &pol->flo; -} - -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - } -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, - policy_to_flow_dir(dir)); + dir); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; @@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) return tos; } -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (xdst->route == NULL) { - /* Dummy bundle - if it has xfrms we were not - * able to build bundle as template resolution failed. - * It means we need to try again resolving. */ - if (xdst->num_xfrms > 0) - return NULL; - } else if (dst->flags & DST_XFRM_QUEUE) { - return NULL; - } else { - /* Real bundle */ - if (stale_bundle(dst)) - return NULL; - } - - dst_hold(dst); - return flo; -} - -static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (!xdst->route) - return 0; - if (stale_bundle(dst)) - return 0; - - return 1; -} - -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - dst->obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(dst); -} - -static const struct flow_cache_ops xfrm_bundle_fc_ops = { - .get = xfrm_bundle_flo_get, - .check = xfrm_bundle_flo_check, - .delete = xfrm_bundle_flo_delete, -}; - static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) struct dst_entry *dst = &xdst->u.dst; memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); - xdst->flo.ops = &xfrm_bundle_fc_ops; } else xdst = ERR_PTR(-ENOBUFS); @@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) +{ + this_cpu_write(xfrm_last_dst, xdst); + if (old) + dst_release(&old->u.dst); +} + +static void __xfrm_pcpu_work_fn(void) +{ + struct xfrm_dst *old; + + old = this_cpu_read(xfrm_last_dst); + if (old && !xfrm_bundle_ok(old)) + xfrm_last_dst_update(NULL, old); +} + +static void xfrm_pcpu_work_fn(struct work_struct *work) +{ + local_bh_disable(); + rcu_read_lock(); + __xfrm_pcpu_work_fn(); + rcu_read_unlock(); + local_bh_enable(); +} + +void xfrm_policy_cache_flush(void) +{ + struct xfrm_dst *old; + bool found = 0; + int cpu; + + local_bh_disable(); + rcu_read_lock(); + for_each_possible_cpu(cpu) { + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + if (smp_processor_id() == cpu) { + __xfrm_pcpu_work_fn(); + continue; + } + found = true; + break; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + if (!found) + return; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + bool bundle_release; + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + bundle_release = old && !xfrm_bundle_ok(old); + rcu_read_unlock(); + + if (!bundle_release) + continue; + + if (cpu_online(cpu)) { + schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); + continue; + } + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + per_cpu(xfrm_last_dst, cpu) = NULL; + dst_release(&old->u.dst); + } + rcu_read_unlock(); + } + + put_online_cpus(); +} + +static bool xfrm_pol_dead(struct xfrm_dst *xdst) +{ + unsigned int num_pols = xdst->num_pols; + unsigned int pol_dead = 0, i; + + for (i = 0; i < num_pols; i++) + pol_dead |= xdst->pols[i]->walk.dead; + + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + if (pol_dead) + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + + return pol_dead; +} + static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *xdst, *old; struct dst_entry *dst; - struct xfrm_dst *xdst; int err; + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + !xfrm_pol_dead(xdst) && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0) { + dst_hold(&xdst->u.dst); + return xdst; + } + + old = xdst; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { @@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); + atomic_set(&xdst->u.dst.__refcnt, 2); + xfrm_last_dst_update(xdst, old); + return xdst; } @@ -2051,86 +2027,39 @@ free_dst: goto out; } -static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, - struct flow_cache_object *oldflo, void *ctx) +static struct xfrm_dst * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { - struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct xfrm_dst *xdst, *new_xdst; - int num_pols = 0, num_xfrms = 0, i, err, pol_dead; - - /* Check if the policies from old bundle are usable */ - xdst = NULL; - if (oldflo) { - xdst = container_of(oldflo, struct xfrm_dst, flo); - num_pols = xdst->num_pols; - num_xfrms = xdst->num_xfrms; - pol_dead = 0; - for (i = 0; i < num_pols; i++) { - pols[i] = xdst->pols[i]; - pol_dead |= pols[i]->walk.dead; - } - if (pol_dead) { - /* Mark DST_OBSOLETE_DEAD to fail the next - * xfrm_dst_check() - */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - xdst = NULL; - num_pols = 0; - num_xfrms = 0; - oldflo = NULL; - } - } + int num_pols = 0, num_xfrms = 0, err; + struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ - if (xdst == NULL) { - num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); - err = xfrm_expand_policies(fl, family, pols, + num_pols = 1; + pols[0] = xfrm_policy_lookup(net, fl, family, dir); + err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); - if (err < 0) - goto inc_error; - if (num_pols == 0) - return NULL; - if (num_xfrms <= 0) - goto make_dummy_bundle; - } + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - if (IS_ERR(new_xdst)) { - err = PTR_ERR(new_xdst); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); if (err != -EAGAIN) goto error; - if (oldflo == NULL) - goto make_dummy_bundle; - dst_hold(&xdst->u.dst); - return oldflo; - } else if (new_xdst == NULL) { + goto make_dummy_bundle; + } else if (xdst == NULL) { num_xfrms = 0; - if (oldflo == NULL) - goto make_dummy_bundle; - xdst->num_xfrms = 0; - dst_hold(&xdst->u.dst); - return oldflo; - } - - /* Kill the previous bundle */ - if (xdst) { - /* The policies were stolen for newly generated bundle */ - xdst->num_pols = 0; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); + goto make_dummy_bundle; } - /* We do need to return one reference for original caller */ - dst_hold(&new_xdst->u.dst); - return &new_xdst->flo; + return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: @@ -2146,17 +2075,12 @@ make_dummy_bundle: memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); - return &xdst->flo; + return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) { - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - } else - xfrm_pols_put(pols, num_pols); + xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } @@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct flow_cache_object *flo; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; @@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, &xflo); - if (flo == NULL) + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + if (xdst == NULL) goto nopol; - if (IS_ERR(flo)) { - err = PTR_ERR(flo); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); goto dropdst; } - xdst = container_of(flo, struct xfrm_dst, flo); num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; @@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int pi; int reverse; struct flowi fl; - u8 fl_dir; int xerr_idx = -1; reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } } - if (!pol) { - struct flow_cache_object *flo; - - flo = flow_cache_lookup(net, &fl, family, fl_dir, - xfrm_policy_lookup, NULL); - if (IS_ERR_OR_NULL(flo)) - pol = ERR_CAST(flo); - else - pol = container_of(flo, struct xfrm_policy, flo); - } + if (!pol) + pol = xfrm_policy_lookup(net, &fl, family, dir); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will - * be marked on it. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. - * Both will force stable_bundle() to fail on any xdst bundle with + * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) @@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -void xfrm_garbage_collect(struct net *net) -{ - flow_cache_flush(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect); - -void xfrm_garbage_collect_deferred(struct net *net) -{ - flow_cache_flush_deferred(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect_deferred); - static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; - rv = flow_cache_init(net); - if (rv < 0) - goto out; return 0; -out: - xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -3054,7 +2946,6 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { - flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - flow_cache_hp_init(); + int i; + + xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), + GFP_KERNEL); + BUG_ON(!xfrm_pcpu_work); + + for (i = 0; i < NR_CPUS; i++) + INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); + register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..82cbbce69b79 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -724,9 +724,10 @@ restart: } } } - if (cnt) + if (cnt) { err = 0; - + xfrm_policy_cache_flush(); + } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..1b539b7dcfab 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } - xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87246be6feb8..770d46cdf9f4 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -37,6 +37,8 @@ hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops +hostprogs-y += xdp_redirect +hostprogs-y += xdp_redirect_map # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -78,6 +80,8 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o +xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o +xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -119,6 +123,8 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += xdp_redirect_kern.o +always += xdp_redirect_map_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -155,6 +161,8 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_xdp_redirect += -lelf +HOSTLOADLIBES_xdp_redirect_map += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c new file mode 100644 index 000000000000..a34ad457a684 --- /dev/null +++ b/samples/bpf/xdp_redirect_kern.c @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect") +int xdp_redirect_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int *ifindex, port = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + ifindex = bpf_map_lookup_elem(&tx_port, &port); + if (!ifindex) + return rc; + + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + return bpf_redirect(*ifindex, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c new file mode 100644 index 000000000000..2faf196e17ea --- /dev/null +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 100, +}; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect_map") +int xdp_redirect_map_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int vport, port = 0, m = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + /* constant virtual port */ + vport = 0; + + /* count packet in global counter */ + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + + /* send packet out physical port */ + return bpf_redirect_map(&tx_port, vport, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c new file mode 100644 index 000000000000..a1ad00fdaa8a --- /dev/null +++ b/samples/bpf/xdp_redirect_map_user.c @@ -0,0 +1,137 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", + map_fd[0], map_fd[1], map_fd[2]); + + /* populate virtual to physical port map */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c new file mode 100644 index 000000000000..f705a1905d2d --- /dev/null +++ b/samples/bpf/xdp_redirect_user.c @@ -0,0 +1,134 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + /* bpf redirect port */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 1450f85b946d..36a7ce9e11ff 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void) struct net *net; rtnl_lock(); - for_each_net(net) { - atomic_inc(&net->xfrm.flow_cache_genid); + for_each_net(net) rt_genid_bump_all(net); - } rtnl_unlock(); } #else diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 89b25068cd98..80f102860cf8 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -211,6 +211,30 @@ function create_bond { echo $'\nBond name:' $bondname + if [ $distro == ubuntu ] + then + local mainfn=$cfgdir/interfaces + local s="^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+${bondname}" + + grep -E "$s" $mainfn + if [ $? -eq 0 ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + elif [ $distro == redhat ] || [ $distro == suse ] + then + local fn=$cfgdir/ifcfg-$bondname + if [ -f $fn ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + else + echo "Unsupported Distro: ${distro}" + return + fi + echo configuring $primary create_eth_cfg_pri_$distro $primary $bondname @@ -219,8 +243,6 @@ function create_bond { echo creating: $bondname with primary slave: $primary create_bond_cfg_$distro $bondname $primary $secondary - - let bondcnt=bondcnt+1 } for (( i=0; i < $eth_cnt-1; i++ )) @@ -228,5 +250,6 @@ do if [ -n "${list_match[$i]}" ] then create_bond ${list_eth[$i]} ${list_match[$i]} + let bondcnt=bondcnt+1 fi done diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ce2988be4f0e..1579cab49717 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1f5300e56b44..445289555487 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -189,6 +189,10 @@ install_lib: all_cmd $(call QUIET_INSTALL, $(LIB_FILE)) \ $(call do_install,$(LIB_FILE),$(libdir_SQ)) +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,bpf.h,$(prefix)/include/bpf,644) + install: install_lib ### Cleaning rules diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d50ac342dc92..acbd60519467 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; +static int (*bpf_redirect_map)(void *map, int key, int flags) = + (void *) BPF_FUNC_redirect_map; static int (*bpf_perf_event_output)(void *ctx, void *map, unsigned long long flags, void *data, int size) = diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 79601c81e169..c991ab69a720 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -438,6 +438,21 @@ static void test_arraymap_percpu_many_keys(void) close(fd); } +static void test_devmap(int task, void *data) +{ + int fd; + __u32 key, value; + + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); + } + + close(fd); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -605,6 +620,8 @@ static void run_all_tests(void) test_arraymap_percpu_many_keys(); + test_devmap(0, NULL); + test_map_large(); test_map_parallel(); test_map_stress(); |