diff options
129 files changed, 2878 insertions, 1022 deletions
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 05150957ecfd..3a6916909d90 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -29,6 +29,9 @@ The following properties are common to the Ethernet controllers: * "smii" * "xgmii" * "trgmii" + * "2000base-x", + * "2500base-x", + * "rxaui" - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 04910764c385..83a7ec4c16d0 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -105,7 +105,7 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) /***************************************************************************** * Ethernet switch ****************************************************************************/ -void __init orion5x_eth_switch_init(struct dsa_platform_data *d) +void __init orion5x_eth_switch_init(struct dsa_chip_data *d) { orion_ge00_switch_init(d); } diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index 8a4115bd441d..efeffc6b4ebb 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -3,7 +3,7 @@ #include <linux/reboot.h> -struct dsa_platform_data; +struct dsa_chip_data; struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -41,7 +41,7 @@ void orion5x_setup_wins(void); void orion5x_ehci0_init(void); void orion5x_ehci1_init(void); void orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data); -void orion5x_eth_switch_init(struct dsa_platform_data *d); +void orion5x_eth_switch_init(struct dsa_chip_data *d); void orion5x_i2c_init(void); void orion5x_sata_init(struct mv_sata_platform_data *sata_data); void orion5x_spi_init(void); diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c index dccadf68ea2b..a3c1336d30c9 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c @@ -101,11 +101,6 @@ static struct dsa_chip_data rd88f5181l_fxo_switch_chip_data = { .port_names[7] = "lan3", }; -static struct dsa_platform_data __initdata rd88f5181l_fxo_switch_plat_data = { - .nr_chips = 1, - .chip = &rd88f5181l_fxo_switch_chip_data, -}; - static void __init rd88f5181l_fxo_init(void) { /* @@ -120,7 +115,7 @@ static void __init rd88f5181l_fxo_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f5181l_fxo_eth_data); - orion5x_eth_switch_init(&rd88f5181l_fxo_switch_plat_data); + orion5x_eth_switch_init(&rd88f5181l_fxo_switch_chip_data); orion5x_uart0_init(); mvebu_mbus_add_window_by_id(ORION_MBUS_DEVBUS_BOOT_TARGET, diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c index affe5ec825de..252efe29bd1a 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c @@ -102,11 +102,6 @@ static struct dsa_chip_data rd88f5181l_ge_switch_chip_data = { .port_names[7] = "lan3", }; -static struct dsa_platform_data __initdata rd88f5181l_ge_switch_plat_data = { - .nr_chips = 1, - .chip = &rd88f5181l_ge_switch_chip_data, -}; - static struct i2c_board_info __initdata rd88f5181l_ge_i2c_rtc = { I2C_BOARD_INFO("ds1338", 0x68), }; @@ -125,7 +120,7 @@ static void __init rd88f5181l_ge_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f5181l_ge_eth_data); - orion5x_eth_switch_init(&rd88f5181l_ge_switch_plat_data); + orion5x_eth_switch_init(&rd88f5181l_ge_switch_chip_data); orion5x_i2c_init(); orion5x_uart0_init(); diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c index 67ee8571b03c..f4f1dbe1d91d 100644 --- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c @@ -40,11 +40,6 @@ static struct dsa_chip_data rd88f6183ap_ge_switch_chip_data = { .port_names[5] = "cpu", }; -static struct dsa_platform_data __initdata rd88f6183ap_ge_switch_plat_data = { - .nr_chips = 1, - .chip = &rd88f6183ap_ge_switch_chip_data, -}; - static struct mtd_partition rd88f6183ap_ge_partitions[] = { { .name = "kernel", @@ -89,7 +84,7 @@ static void __init rd88f6183ap_ge_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f6183ap_ge_eth_data); - orion5x_eth_switch_init(&rd88f6183ap_ge_switch_plat_data); + orion5x_eth_switch_init(&rd88f6183ap_ge_switch_chip_data); spi_register_board_info(rd88f6183ap_ge_spi_slave_info, ARRAY_SIZE(rd88f6183ap_ge_spi_slave_info)); orion5x_spi_init(); diff --git a/arch/arm/mach-orion5x/wnr854t-setup.c b/arch/arm/mach-orion5x/wnr854t-setup.c index 4dbcdbe1de7c..ac3376fc269f 100644 --- a/arch/arm/mach-orion5x/wnr854t-setup.c +++ b/arch/arm/mach-orion5x/wnr854t-setup.c @@ -124,7 +124,7 @@ static void __init wnr854t_init(void) * Configure peripherals. */ orion5x_eth_init(&wnr854t_eth_data); - orion5x_eth_switch_init(&wnr854t_switch_plat_data); + orion5x_eth_switch_init(&wnr854t_switch_chip_data); orion5x_uart0_init(); mvebu_mbus_add_window_by_id(ORION_MBUS_DEVBUS_BOOT_TARGET, diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c index a6a8c4648d74..9250bb2e429c 100644 --- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c +++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c @@ -191,11 +191,6 @@ static struct dsa_chip_data wrt350n_v2_switch_chip_data = { .port_names[7] = "lan4", }; -static struct dsa_platform_data __initdata wrt350n_v2_switch_plat_data = { - .nr_chips = 1, - .chip = &wrt350n_v2_switch_chip_data, -}; - static void __init wrt350n_v2_init(void) { /* @@ -210,7 +205,7 @@ static void __init wrt350n_v2_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&wrt350n_v2_eth_data); - orion5x_eth_switch_init(&wrt350n_v2_switch_plat_data); + orion5x_eth_switch_init(&wrt350n_v2_switch_chip_data); orion5x_uart0_init(); mvebu_mbus_add_window_by_id(ORION_MBUS_DEVBUS_BOOT_TARGET, diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 272f49b2c68f..9255b6d67ba5 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -22,6 +22,7 @@ #include <linux/platform_data/dma-mv_xor.h> #include <linux/platform_data/usb-ehci-orion.h> #include <plat/common.h> +#include <linux/phy.h> /* Create a clkdev entry for a given device/clk */ void __init orion_clkdev_add(const char *con_id, const char *dev_id, @@ -470,15 +471,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, /***************************************************************************** * Ethernet switch ****************************************************************************/ -void __init orion_ge00_switch_init(struct dsa_platform_data *d) +static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii"; +static __initdata struct mdio_board_info + orion_ge00_switch_board_info; + +void __init orion_ge00_switch_init(struct dsa_chip_data *d) { - int i; + struct mdio_board_info *bd; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(d->port_names); i++) + if (!strcmp(d->port_names[i], "cpu")) + break; - d->netdev = &orion_ge00.dev; - for (i = 0; i < d->nr_chips; i++) - d->chip[i].host_dev = &orion_ge_mvmdio.dev; + bd = &orion_ge00_switch_board_info; + bd->bus_id = orion_ge00_mvmdio_bus_name; + bd->mdio_addr = d->sw_addr; + d->netdev[i] = &orion_ge00.dev; + strcpy(bd->modalias, "mv88e6085"); + bd->platform_data = d; - platform_device_register_data(NULL, "dsa", 0, d, sizeof(d)); + mdiobus_register_board_info(&orion_ge00_switch_board_info, 1); } /***************************************************************************** diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index 9347f3c58a6d..3647d3b33c20 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -12,7 +12,7 @@ #include <linux/mv643xx_eth.h> #include <linux/platform_data/usb-ehci-orion.h> -struct dsa_platform_data; +struct dsa_chip_data; struct mv_sata_platform_data; void __init orion_uart0_init(void __iomem *membase, @@ -57,7 +57,7 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq); -void __init orion_ge00_switch_init(struct dsa_platform_data *d); +void __init orion_ge00_switch_init(struct dsa_chip_data *d); void __init orion_i2c_init(unsigned long mapbase, unsigned long irq, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3e70a9c5d79d..4eb5a80e5d81 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2467,14 +2467,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) struct net_device *dev; prio = rt_tos2priority(tos); - dev = ndev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_real_dev(ndev) : ndev; - + dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); #if IS_ENABLED(CONFIG_VLAN_8021Q) - if (ndev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(ndev)) return (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; #endif diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 4abdeb359fb4..d9d15561eb5d 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -118,7 +118,7 @@ static struct device *dma_device(struct rxe_dev *rxe) ndev = rxe->ndev; - if (ndev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); return ndev->dev.parent; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e6af04716cf7..6321f12630c8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4145,8 +4145,6 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, - .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, - .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 7a85495dbb0c..0da4f2f5c7e3 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o obj-$(CONFIG_CAN_SLCAN) += slcan.o obj-$(CONFIG_CAN_DEV) += can-dev.o -can-dev-y := dev.o +can-dev-y += dev.o +can-dev-y += rx-offload.o can-dev-$(CONFIG_CAN_LEDS) += led.o diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 43cfce8b076b..ea57fed375c6 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -3,7 +3,8 @@ * * Copyright (c) 2005-2006 Varma Electronics Oy * Copyright (c) 2009 Sascha Hauer, Pengutronix - * Copyright (c) 2010 Marc Kleine-Budde, Pengutronix + * Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> + * Copyright (c) 2014 David Jander, Protonic Holland * * Based on code originally by Andrey Volkov <avolkov@varma-el.com> * @@ -24,6 +25,7 @@ #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/led.h> +#include <linux/can/rx-offload.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/interrupt.h> @@ -55,9 +57,10 @@ #define FLEXCAN_MCR_WAK_SRC BIT(19) #define FLEXCAN_MCR_DOZE BIT(18) #define FLEXCAN_MCR_SRX_DIS BIT(17) -#define FLEXCAN_MCR_BCC BIT(16) +#define FLEXCAN_MCR_IRMQ BIT(16) #define FLEXCAN_MCR_LPRIO_EN BIT(13) #define FLEXCAN_MCR_AEN BIT(12) +/* MCR_MAXMB: maximum used MBs is MAXMB + 1 */ #define FLEXCAN_MCR_MAXMB(x) ((x) & 0x7f) #define FLEXCAN_MCR_IDAM_A (0x0 << 8) #define FLEXCAN_MCR_IDAM_B (0x1 << 8) @@ -143,17 +146,20 @@ /* FLEXCAN interrupt flag register (IFLAG) bits */ /* Errata ERR005829 step7: Reserve first valid MB */ -#define FLEXCAN_TX_BUF_RESERVED 8 -#define FLEXCAN_TX_BUF_ID 9 -#define FLEXCAN_IFLAG_BUF(x) BIT(x) +#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8 +#define FLEXCAN_TX_MB_OFF_FIFO 9 +#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0 +#define FLEXCAN_TX_MB_OFF_TIMESTAMP 1 +#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_OFF_TIMESTAMP + 1) +#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST 63 +#define FLEXCAN_IFLAG_MB(x) BIT(x) #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7) #define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6) #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5) -#define FLEXCAN_IFLAG_DEFAULT \ - (FLEXCAN_IFLAG_RX_FIFO_OVERFLOW | FLEXCAN_IFLAG_RX_FIFO_AVAILABLE | \ - FLEXCAN_IFLAG_BUF(FLEXCAN_TX_BUF_ID)) /* FLEXCAN message buffers */ +#define FLEXCAN_MB_CODE_MASK (0xf << 24) +#define FLEXCAN_MB_CODE_RX_BUSY_BIT (0x1 << 24) #define FLEXCAN_MB_CODE_RX_INACTIVE (0x0 << 24) #define FLEXCAN_MB_CODE_RX_EMPTY (0x4 << 24) #define FLEXCAN_MB_CODE_RX_FULL (0x2 << 24) @@ -189,7 +195,9 @@ */ #define FLEXCAN_QUIRK_BROKEN_ERR_STATE BIT(1) /* [TR]WRN_INT not connected */ #define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2) /* Disable RX FIFO Global mask */ -#define FLEXCAN_QUIRK_DISABLE_MECR BIT(3) /* Disble Memory error detection */ +#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ +#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disble Memory error detection */ +#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ /* Structure of the message buffer */ struct flexcan_mb { @@ -213,7 +221,10 @@ struct flexcan_regs { u32 imask1; /* 0x28 */ u32 iflag2; /* 0x2c */ u32 iflag1; /* 0x30 */ - u32 ctrl2; /* 0x34 */ + union { /* 0x34 */ + u32 gfwr_mx28; /* MX28, MX53 */ + u32 ctrl2; /* MX6, VF610 */ + }; u32 esr2; /* 0x38 */ u32 imeur; /* 0x3c */ u32 lrfr; /* 0x40 */ @@ -232,7 +243,11 @@ struct flexcan_regs { * size conf'ed via ctrl2::RFFN * (mx6, vf610) */ - u32 _reserved4[408]; + u32 _reserved4[256]; /* 0x480 */ + u32 rximr[64]; /* 0x880 */ + u32 _reserved5[24]; /* 0x980 */ + u32 gfwr_mx6; /* 0x9e0 - MX6 */ + u32 _reserved6[63]; /* 0x9e4 */ u32 mecr; /* 0xae0 */ u32 erriar; /* 0xae4 */ u32 erridpr; /* 0xae8 */ @@ -249,31 +264,36 @@ struct flexcan_devtype_data { struct flexcan_priv { struct can_priv can; - struct napi_struct napi; + struct can_rx_offload offload; struct flexcan_regs __iomem *regs; - u32 reg_esr; + struct flexcan_mb __iomem *tx_mb; + struct flexcan_mb __iomem *tx_mb_reserved; + u8 tx_mb_idx; u32 reg_ctrl_default; + u32 reg_imask1_default; + u32 reg_imask2_default; struct clk *clk_ipg; struct clk *clk_per; - struct flexcan_platform_data *pdata; const struct flexcan_devtype_data *devtype_data; struct regulator *reg_xceiver; }; -static struct flexcan_devtype_data fsl_p1010_devtype_data = { +static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_ERR_STATE, }; -static struct flexcan_devtype_data fsl_imx28_devtype_data; +static const struct flexcan_devtype_data fsl_imx28_devtype_data; -static struct flexcan_devtype_data fsl_imx6q_devtype_data = { - .quirks = FLEXCAN_QUIRK_DISABLE_RXFG, +static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { + .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | + FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, }; -static struct flexcan_devtype_data fsl_vf610_devtype_data = { - .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_DISABLE_MECR, +static const struct flexcan_devtype_data fsl_vf610_devtype_data = { + .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | + FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, }; static const struct can_bittiming_const flexcan_bittiming_const = { @@ -331,13 +351,6 @@ static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv) return regulator_disable(priv->reg_xceiver); } -static inline int flexcan_has_and_handle_berr(const struct flexcan_priv *priv, - u32 reg_esr) -{ - return (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) && - (reg_esr & FLEXCAN_ESR_ERR_BUS); -} - static int flexcan_chip_enable(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->regs; @@ -468,7 +481,6 @@ static int flexcan_get_berr_counter(const struct net_device *dev, static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct flexcan_priv *priv = netdev_priv(dev); - struct flexcan_regs __iomem *regs = priv->regs; struct can_frame *cf = (struct can_frame *)skb->data; u32 can_id; u32 data; @@ -491,68 +503,73 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) if (cf->can_dlc > 0) { data = be32_to_cpup((__be32 *)&cf->data[0]); - flexcan_write(data, ®s->mb[FLEXCAN_TX_BUF_ID].data[0]); + flexcan_write(data, &priv->tx_mb->data[0]); } if (cf->can_dlc > 3) { data = be32_to_cpup((__be32 *)&cf->data[4]); - flexcan_write(data, ®s->mb[FLEXCAN_TX_BUF_ID].data[1]); + flexcan_write(data, &priv->tx_mb->data[1]); } can_put_echo_skb(skb, dev, 0); - flexcan_write(can_id, ®s->mb[FLEXCAN_TX_BUF_ID].can_id); - flexcan_write(ctrl, ®s->mb[FLEXCAN_TX_BUF_ID].can_ctrl); + flexcan_write(can_id, &priv->tx_mb->can_id); + flexcan_write(ctrl, &priv->tx_mb->can_ctrl); /* Errata ERR005829 step8: * Write twice INACTIVE(0x8) code to first MB. */ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, - ®s->mb[FLEXCAN_TX_BUF_RESERVED].can_ctrl); + &priv->tx_mb_reserved->can_ctrl); flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, - ®s->mb[FLEXCAN_TX_BUF_RESERVED].can_ctrl); + &priv->tx_mb_reserved->can_ctrl); return NETDEV_TX_OK; } -static void do_bus_err(struct net_device *dev, - struct can_frame *cf, u32 reg_esr) +static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) { struct flexcan_priv *priv = netdev_priv(dev); - int rx_errors = 0, tx_errors = 0; + struct sk_buff *skb; + struct can_frame *cf; + bool rx_errors = false, tx_errors = false; + + skb = alloc_can_err_skb(dev, &cf); + if (unlikely(!skb)) + return; cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; if (reg_esr & FLEXCAN_ESR_BIT1_ERR) { netdev_dbg(dev, "BIT1_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_BIT1; - tx_errors = 1; + tx_errors = true; } if (reg_esr & FLEXCAN_ESR_BIT0_ERR) { netdev_dbg(dev, "BIT0_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_BIT0; - tx_errors = 1; + tx_errors = true; } if (reg_esr & FLEXCAN_ESR_ACK_ERR) { netdev_dbg(dev, "ACK_ERR irq\n"); cf->can_id |= CAN_ERR_ACK; cf->data[3] = CAN_ERR_PROT_LOC_ACK; - tx_errors = 1; + tx_errors = true; } if (reg_esr & FLEXCAN_ESR_CRC_ERR) { netdev_dbg(dev, "CRC_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_BIT; cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; - rx_errors = 1; + rx_errors = true; } if (reg_esr & FLEXCAN_ESR_FRM_ERR) { netdev_dbg(dev, "FRM_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_FORM; - rx_errors = 1; + rx_errors = true; } if (reg_esr & FLEXCAN_ESR_STF_ERR) { netdev_dbg(dev, "STF_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_STUFF; - rx_errors = 1; + rx_errors = true; } priv->can.can_stats.bus_error++; @@ -560,32 +577,16 @@ static void do_bus_err(struct net_device *dev, dev->stats.rx_errors++; if (tx_errors) dev->stats.tx_errors++; -} - -static int flexcan_poll_bus_err(struct net_device *dev, u32 reg_esr) -{ - struct sk_buff *skb; - struct can_frame *cf; - - skb = alloc_can_err_skb(dev, &cf); - if (unlikely(!skb)) - return 0; - - do_bus_err(dev, cf, reg_esr); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += cf->can_dlc; - netif_receive_skb(skb); - return 1; + can_rx_offload_irq_queue_err_skb(&priv->offload, skb); } -static int flexcan_poll_state(struct net_device *dev, u32 reg_esr) +static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) { struct flexcan_priv *priv = netdev_priv(dev); struct sk_buff *skb; struct can_frame *cf; - enum can_state new_state = 0, rx_state = 0, tx_state = 0; + enum can_state new_state, rx_state, tx_state; int flt; struct can_berr_counter bec; @@ -606,33 +607,63 @@ static int flexcan_poll_state(struct net_device *dev, u32 reg_esr) /* state hasn't changed */ if (likely(new_state == priv->can.state)) - return 0; + return; skb = alloc_can_err_skb(dev, &cf); if (unlikely(!skb)) - return 0; + return; can_change_state(dev, cf, tx_state, rx_state); if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - dev->stats.rx_packets++; - dev->stats.rx_bytes += cf->can_dlc; - netif_receive_skb(skb); + can_rx_offload_irq_queue_err_skb(&priv->offload, skb); +} - return 1; +static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) +{ + return container_of(offload, struct flexcan_priv, offload); } -static void flexcan_read_fifo(const struct net_device *dev, - struct can_frame *cf) +static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload, + struct can_frame *cf, + u32 *timestamp, unsigned int n) { - const struct flexcan_priv *priv = netdev_priv(dev); + struct flexcan_priv *priv = rx_offload_to_priv(offload); struct flexcan_regs __iomem *regs = priv->regs; - struct flexcan_mb __iomem *mb = ®s->mb[0]; - u32 reg_ctrl, reg_id; + struct flexcan_mb __iomem *mb = ®s->mb[n]; + u32 reg_ctrl, reg_id, reg_iflag1; + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + u32 code; + + do { + reg_ctrl = flexcan_read(&mb->can_ctrl); + } while (reg_ctrl & FLEXCAN_MB_CODE_RX_BUSY_BIT); + + /* is this MB empty? */ + code = reg_ctrl & FLEXCAN_MB_CODE_MASK; + if ((code != FLEXCAN_MB_CODE_RX_FULL) && + (code != FLEXCAN_MB_CODE_RX_OVERRUN)) + return 0; + + if (code == FLEXCAN_MB_CODE_RX_OVERRUN) { + /* This MB was overrun, we lost data */ + offload->dev->stats.rx_over_errors++; + offload->dev->stats.rx_errors++; + } + } else { + reg_iflag1 = flexcan_read(®s->iflag1); + if (!(reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE)) + return 0; + + reg_ctrl = flexcan_read(&mb->can_ctrl); + } + + /* increase timstamp to full 32 bit */ + *timestamp = reg_ctrl << 16; - reg_ctrl = flexcan_read(&mb->can_ctrl); reg_id = flexcan_read(&mb->can_id); if (reg_ctrl & FLEXCAN_MB_CNT_IDE) cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; @@ -647,69 +678,31 @@ static void flexcan_read_fifo(const struct net_device *dev, *(__be32 *)(cf->data + 4) = cpu_to_be32(flexcan_read(&mb->data[1])); /* mark as read */ - flexcan_write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, ®s->iflag1); - flexcan_read(®s->timer); -} - -static int flexcan_read_frame(struct net_device *dev) -{ - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf; - struct sk_buff *skb; - - skb = alloc_can_skb(dev, &cf); - if (unlikely(!skb)) { - stats->rx_dropped++; - return 0; + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + /* Clear IRQ */ + if (n < 32) + flexcan_write(BIT(n), ®s->iflag1); + else + flexcan_write(BIT(n - 32), ®s->iflag2); + } else { + flexcan_write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, ®s->iflag1); + flexcan_read(®s->timer); } - flexcan_read_fifo(dev, cf); - - stats->rx_packets++; - stats->rx_bytes += cf->can_dlc; - netif_receive_skb(skb); - - can_led_event(dev, CAN_LED_EVENT_RX); - return 1; } -static int flexcan_poll(struct napi_struct *napi, int quota) + +static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv) { - struct net_device *dev = napi->dev; - const struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->regs; - u32 reg_iflag1, reg_esr; - int work_done = 0; - - /* The error bits are cleared on read, - * use saved value from irq handler. - */ - reg_esr = flexcan_read(®s->esr) | priv->reg_esr; - - /* handle state changes */ - work_done += flexcan_poll_state(dev, reg_esr); + u32 iflag1, iflag2; - /* handle RX-FIFO */ - reg_iflag1 = flexcan_read(®s->iflag1); - while (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE && - work_done < quota) { - work_done += flexcan_read_frame(dev); - reg_iflag1 = flexcan_read(®s->iflag1); - } - - /* report bus errors */ - if (flexcan_has_and_handle_berr(priv, reg_esr) && work_done < quota) - work_done += flexcan_poll_bus_err(dev, reg_esr); + iflag2 = flexcan_read(®s->iflag2) & priv->reg_imask2_default; + iflag1 = flexcan_read(®s->iflag1) & priv->reg_imask1_default & + ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx); - if (work_done < quota) { - napi_complete_done(napi, work_done); - /* enable IRQs */ - flexcan_write(FLEXCAN_IFLAG_DEFAULT, ®s->imask1); - flexcan_write(priv->reg_ctrl_default, ®s->ctrl); - } - - return work_done; + return (u64)iflag2 << 32 | iflag1; } static irqreturn_t flexcan_irq(int irq, void *dev_id) @@ -718,55 +711,70 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) struct net_device_stats *stats = &dev->stats; struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->regs; + irqreturn_t handled = IRQ_NONE; u32 reg_iflag1, reg_esr; reg_iflag1 = flexcan_read(®s->iflag1); - reg_esr = flexcan_read(®s->esr); - /* ACK all bus error and state change IRQ sources */ - if (reg_esr & FLEXCAN_ESR_ALL_INT) - flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, ®s->esr); - - /* schedule NAPI in case of: - * - rx IRQ - * - state change IRQ - * - bus error IRQ and bus error reporting is activated - */ - if ((reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) || - (reg_esr & FLEXCAN_ESR_ERR_STATE) || - flexcan_has_and_handle_berr(priv, reg_esr)) { - /* The error bits are cleared on read, - * save them for later use. - */ - priv->reg_esr = reg_esr & FLEXCAN_ESR_ERR_BUS; - flexcan_write(FLEXCAN_IFLAG_DEFAULT & - ~FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, ®s->imask1); - flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, - ®s->ctrl); - napi_schedule(&priv->napi); - } + /* reception interrupt */ + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + u64 reg_iflag; + int ret; + + while ((reg_iflag = flexcan_read_reg_iflag_rx(priv))) { + handled = IRQ_HANDLED; + ret = can_rx_offload_irq_offload_timestamp(&priv->offload, + reg_iflag); + if (!ret) + break; + } + } else { + if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) { + handled = IRQ_HANDLED; + can_rx_offload_irq_offload_fifo(&priv->offload); + } - /* FIFO overflow */ - if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) { - flexcan_write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, ®s->iflag1); - dev->stats.rx_over_errors++; - dev->stats.rx_errors++; + /* FIFO overflow interrupt */ + if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) { + handled = IRQ_HANDLED; + flexcan_write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, ®s->iflag1); + dev->stats.rx_over_errors++; + dev->stats.rx_errors++; + } } /* transmission complete interrupt */ - if (reg_iflag1 & (1 << FLEXCAN_TX_BUF_ID)) { + if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) { + handled = IRQ_HANDLED; stats->tx_bytes += can_get_echo_skb(dev, 0); stats->tx_packets++; can_led_event(dev, CAN_LED_EVENT_TX); /* after sending a RTR frame MB is in RX mode */ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, - ®s->mb[FLEXCAN_TX_BUF_ID].can_ctrl); - flexcan_write((1 << FLEXCAN_TX_BUF_ID), ®s->iflag1); + &priv->tx_mb->can_ctrl); + flexcan_write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag1); netif_wake_queue(dev); } - return IRQ_HANDLED; + reg_esr = flexcan_read(®s->esr); + + /* ACK all bus error and state change IRQ sources */ + if (reg_esr & FLEXCAN_ESR_ALL_INT) { + handled = IRQ_HANDLED; + flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, ®s->esr); + } + + /* state change interrupt */ + if (reg_esr & FLEXCAN_ESR_ERR_STATE) + flexcan_irq_state(dev, reg_esr); + + /* bus error IRQ - handle if bus error reporting is activated */ + if ((reg_esr & FLEXCAN_ESR_ERR_BUS) && + (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) + flexcan_irq_bus_err(dev, reg_esr); + + return handled; } static void flexcan_set_bittiming(struct net_device *dev) @@ -839,14 +847,23 @@ static int flexcan_chip_start(struct net_device *dev) * only supervisor access * enable warning int * disable local echo + * enable individual RX masking * choose format C * set max mailbox number */ reg_mcr = flexcan_read(®s->mcr); reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); - reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_FEN | FLEXCAN_MCR_HALT | - FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | - FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(FLEXCAN_TX_BUF_ID); + reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | + FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ | + FLEXCAN_MCR_IDAM_C; + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + reg_mcr &= ~FLEXCAN_MCR_FEN; + reg_mcr |= FLEXCAN_MCR_MAXMB(priv->offload.mb_last); + } else { + reg_mcr |= FLEXCAN_MCR_FEN | + FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); + } netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr); flexcan_write(reg_mcr, ®s->mcr); @@ -883,19 +900,31 @@ static int flexcan_chip_start(struct net_device *dev) netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl); flexcan_write(reg_ctrl, ®s->ctrl); + if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) { + reg_ctrl2 = flexcan_read(®s->ctrl2); + reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS; + flexcan_write(reg_ctrl2, ®s->ctrl2); + } + /* clear and invalidate all mailboxes first */ - for (i = FLEXCAN_TX_BUF_ID; i < ARRAY_SIZE(regs->mb); i++) { + for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) { flexcan_write(FLEXCAN_MB_CODE_RX_INACTIVE, ®s->mb[i].can_ctrl); } + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) + flexcan_write(FLEXCAN_MB_CODE_RX_EMPTY, + ®s->mb[i].can_ctrl); + } + /* Errata ERR005829: mark first TX mailbox as INACTIVE */ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, - ®s->mb[FLEXCAN_TX_BUF_RESERVED].can_ctrl); + &priv->tx_mb_reserved->can_ctrl); /* mark TX mailbox as INACTIVE */ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, - ®s->mb[FLEXCAN_TX_BUF_ID].can_ctrl); + &priv->tx_mb->can_ctrl); /* acceptance mask/acceptance code (accept everything) */ flexcan_write(0x0, ®s->rxgmask); @@ -905,6 +934,10 @@ static int flexcan_chip_start(struct net_device *dev) if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG) flexcan_write(0x0, ®s->rxfgmask); + /* clear acceptance filters */ + for (i = 0; i < ARRAY_SIZE(regs->mb); i++) + flexcan_write(0, ®s->rximr[i]); + /* On Vybrid, disable memory error detection interrupts * and freeze mode. * This also works around errata e5295 which generates @@ -942,7 +975,8 @@ static int flexcan_chip_start(struct net_device *dev) /* enable interrupts atomically */ disable_irq(dev->irq); flexcan_write(priv->reg_ctrl_default, ®s->ctrl); - flexcan_write(FLEXCAN_IFLAG_DEFAULT, ®s->imask1); + flexcan_write(priv->reg_imask1_default, ®s->imask1); + flexcan_write(priv->reg_imask2_default, ®s->imask2); enable_irq(dev->irq); /* print chip status */ @@ -972,6 +1006,7 @@ static void flexcan_chip_stop(struct net_device *dev) flexcan_chip_disable(priv); /* Disable all interrupts */ + flexcan_write(0, ®s->imask2); flexcan_write(0, ®s->imask1); flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, ®s->ctrl); @@ -1008,7 +1043,7 @@ static int flexcan_open(struct net_device *dev) can_led_event(dev, CAN_LED_EVENT_OPEN); - napi_enable(&priv->napi); + can_rx_offload_enable(&priv->offload); netif_start_queue(dev); return 0; @@ -1030,7 +1065,7 @@ static int flexcan_close(struct net_device *dev) struct flexcan_priv *priv = netdev_priv(dev); netif_stop_queue(dev); - napi_disable(&priv->napi); + can_rx_offload_disable(&priv->offload); flexcan_chip_stop(dev); free_irq(dev->irq, dev); @@ -1104,8 +1139,9 @@ static int register_flexcandev(struct net_device *dev) flexcan_write(reg, ®s->mcr); /* Currently we only support newer versions of this core - * featuring a RX FIFO. Older cores found on some Coldfire - * derivates are not yet supported. + * featuring a RX hardware FIFO (although this driver doesn't + * make use of it on some cores). Older cores, found on some + * Coldfire derivates are not tested. */ reg = flexcan_read(®s->mcr); if (!(reg & FLEXCAN_MCR_FEN)) { @@ -1208,6 +1244,9 @@ static int flexcan_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; + platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); + dev->netdev_ops = &flexcan_netdev_ops; dev->irq = irq; dev->flags |= IFF_ECHO; @@ -1223,14 +1262,41 @@ static int flexcan_probe(struct platform_device *pdev) priv->regs = regs; priv->clk_ipg = clk_ipg; priv->clk_per = clk_per; - priv->pdata = dev_get_platdata(&pdev->dev); priv->devtype_data = devtype_data; priv->reg_xceiver = reg_xceiver; - netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT); + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP; + priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP]; + } else { + priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO; + priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO]; + } + priv->tx_mb = ®s->mb[priv->tx_mb_idx]; - platform_set_drvdata(pdev, dev); - SET_NETDEV_DEV(dev, &pdev->dev); + priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx); + priv->reg_imask2_default = 0; + + priv->offload.mailbox_read = flexcan_mailbox_read; + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + u64 imask; + + priv->offload.mb_first = FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST; + priv->offload.mb_last = FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST; + + imask = GENMASK_ULL(priv->offload.mb_last, priv->offload.mb_first); + priv->reg_imask1_default |= imask; + priv->reg_imask2_default |= imask >> 32; + + err = can_rx_offload_add_timestamp(dev, &priv->offload); + } else { + priv->reg_imask1_default |= FLEXCAN_IFLAG_RX_FIFO_OVERFLOW | + FLEXCAN_IFLAG_RX_FIFO_AVAILABLE; + err = can_rx_offload_add_fifo(dev, &priv->offload, FLEXCAN_NAPI_WEIGHT); + } + if (err) + goto failed_offload; err = register_flexcandev(dev); if (err) { @@ -1245,6 +1311,7 @@ static int flexcan_probe(struct platform_device *pdev) return 0; + failed_offload: failed_register: free_candev(dev); return err; @@ -1256,7 +1323,7 @@ static int flexcan_remove(struct platform_device *pdev) struct flexcan_priv *priv = netdev_priv(dev); unregister_flexcandev(dev); - netif_napi_del(&priv->napi); + can_rx_offload_del(&priv->offload); free_candev(dev); return 0; diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c new file mode 100644 index 000000000000..f394f77d7528 --- /dev/null +++ b/drivers/net/can/rx-offload.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2014 David Jander, Protonic Holland + * Copyright (C) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/can/dev.h> +#include <linux/can/rx-offload.h> + +struct can_rx_offload_cb { + u32 timestamp; +}; + +static inline struct can_rx_offload_cb *can_rx_offload_get_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct can_rx_offload_cb) > sizeof(skb->cb)); + + return (struct can_rx_offload_cb *)skb->cb; +} + +static inline bool can_rx_offload_le(struct can_rx_offload *offload, unsigned int a, unsigned int b) +{ + if (offload->inc) + return a <= b; + else + return a >= b; +} + +static inline unsigned int can_rx_offload_inc(struct can_rx_offload *offload, unsigned int *val) +{ + if (offload->inc) + return (*val)++; + else + return (*val)--; +} + +static int can_rx_offload_napi_poll(struct napi_struct *napi, int quota) +{ + struct can_rx_offload *offload = container_of(napi, struct can_rx_offload, napi); + struct net_device *dev = offload->dev; + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + int work_done = 0; + + while ((work_done < quota) && + (skb = skb_dequeue(&offload->skb_queue))) { + struct can_frame *cf = (struct can_frame *)skb->data; + + work_done++; + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); + } + + if (work_done < quota) { + napi_complete_done(napi, work_done); + + /* Check if there was another interrupt */ + if (!skb_queue_empty(&offload->skb_queue)) + napi_reschedule(&offload->napi); + } + + can_led_event(offload->dev, CAN_LED_EVENT_RX); + + return work_done; +} + +static inline void __skb_queue_add_sort(struct sk_buff_head *head, struct sk_buff *new, + int (*compare)(struct sk_buff *a, struct sk_buff *b)) +{ + struct sk_buff *pos, *insert = (struct sk_buff *)head; + + skb_queue_reverse_walk(head, pos) { + const struct can_rx_offload_cb *cb_pos, *cb_new; + + cb_pos = can_rx_offload_get_cb(pos); + cb_new = can_rx_offload_get_cb(new); + + netdev_dbg(new->dev, + "%s: pos=0x%08x, new=0x%08x, diff=%10d, queue_len=%d\n", + __func__, + cb_pos->timestamp, cb_new->timestamp, + cb_new->timestamp - cb_pos->timestamp, + skb_queue_len(head)); + + if (compare(pos, new) < 0) + continue; + insert = pos; + break; + } + + __skb_queue_after(head, insert, new); +} + +static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b) +{ + const struct can_rx_offload_cb *cb_a, *cb_b; + + cb_a = can_rx_offload_get_cb(a); + cb_b = can_rx_offload_get_cb(b); + + /* Substract two u32 and return result as int, to keep + * difference steady around the u32 overflow. + */ + return cb_b->timestamp - cb_a->timestamp; +} + +static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) +{ + struct sk_buff *skb = NULL; + struct can_rx_offload_cb *cb; + struct can_frame *cf; + int ret; + + /* If queue is full or skb not available, read to discard mailbox */ + if (likely(skb_queue_len(&offload->skb_queue) <= + offload->skb_queue_len_max)) + skb = alloc_can_skb(offload->dev, &cf); + + if (!skb) { + struct can_frame cf_overflow; + u32 timestamp; + + ret = offload->mailbox_read(offload, &cf_overflow, + ×tamp, n); + if (ret) + offload->dev->stats.rx_dropped++; + + return NULL; + } + + cb = can_rx_offload_get_cb(skb); + ret = offload->mailbox_read(offload, cf, &cb->timestamp, n); + if (!ret) { + kfree_skb(skb); + return NULL; + } + + return skb; +} + +int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pending) +{ + struct sk_buff_head skb_queue; + unsigned int i; + + __skb_queue_head_init(&skb_queue); + + for (i = offload->mb_first; + can_rx_offload_le(offload, i, offload->mb_last); + can_rx_offload_inc(offload, &i)) { + struct sk_buff *skb; + + if (!(pending & BIT_ULL(i))) + continue; + + skb = can_rx_offload_offload_one(offload, i); + if (!skb) + break; + + __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); + } + + if (!skb_queue_empty(&skb_queue)) { + unsigned long flags; + u32 queue_len; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + skb_queue_splice_tail(&skb_queue, &offload->skb_queue); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + if ((queue_len = skb_queue_len(&offload->skb_queue)) > + (offload->skb_queue_len_max / 8)) + netdev_dbg(offload->dev, "%s: queue_len=%d\n", + __func__, queue_len); + + can_rx_offload_schedule(offload); + } + + return skb_queue_len(&skb_queue); +} +EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_timestamp); + +int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) +{ + struct sk_buff *skb; + int received = 0; + + while ((skb = can_rx_offload_offload_one(offload, 0))) { + skb_queue_tail(&offload->skb_queue, skb); + received++; + } + + if (received) + can_rx_offload_schedule(offload); + + return received; +} +EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); + +int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) +{ + if (skb_queue_len(&offload->skb_queue) > + offload->skb_queue_len_max) + return -ENOMEM; + + skb_queue_tail(&offload->skb_queue, skb); + can_rx_offload_schedule(offload); + + return 0; +} +EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb); + +static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) +{ + offload->dev = dev; + + /* Limit queue len to 4x the weight (rounted to next power of two) */ + offload->skb_queue_len_max = 2 << fls(weight); + offload->skb_queue_len_max *= 4; + skb_queue_head_init(&offload->skb_queue); + + can_rx_offload_reset(offload); + netif_napi_add(dev, &offload->napi, can_rx_offload_napi_poll, weight); + + dev_dbg(dev->dev.parent, "%s: skb_queue_len_max=%d\n", + __func__, offload->skb_queue_len_max); + + return 0; +} + +int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload) +{ + unsigned int weight; + + if (offload->mb_first > BITS_PER_LONG_LONG || + offload->mb_last > BITS_PER_LONG_LONG || !offload->mailbox_read) + return -EINVAL; + + if (offload->mb_first < offload->mb_last) { + offload->inc = true; + weight = offload->mb_last - offload->mb_first; + } else { + offload->inc = false; + weight = offload->mb_first - offload->mb_last; + } + + return can_rx_offload_init_queue(dev, offload, weight);; +} +EXPORT_SYMBOL_GPL(can_rx_offload_add_timestamp); + +int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) +{ + if (!offload->mailbox_read) + return -EINVAL; + + return can_rx_offload_init_queue(dev, offload, weight); +} +EXPORT_SYMBOL_GPL(can_rx_offload_add_fifo); + +void can_rx_offload_enable(struct can_rx_offload *offload) +{ + can_rx_offload_reset(offload); + napi_enable(&offload->napi); +} +EXPORT_SYMBOL_GPL(can_rx_offload_enable); + +void can_rx_offload_del(struct can_rx_offload *offload) +{ + netif_napi_del(&offload->napi); + skb_queue_purge(&offload->skb_queue); +} +EXPORT_SYMBOL_GPL(can_rx_offload_del); + +void can_rx_offload_reset(struct can_rx_offload *offload) +{ +} +EXPORT_SYMBOL_GPL(can_rx_offload_reset); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 22ce57256d34..7b4e40b286e4 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -677,11 +677,6 @@ static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, return err; } -static bool mv88e6xxx_6095_family(struct mv88e6xxx_chip *chip) -{ - return chip->info->family == MV88E6XXX_FAMILY_6095; -} - static bool mv88e6xxx_6097_family(struct mv88e6xxx_chip *chip) { return chip->info->family == MV88E6XXX_FAMILY_6097; @@ -692,11 +687,6 @@ static bool mv88e6xxx_6165_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6165; } -static bool mv88e6xxx_6185_family(struct mv88e6xxx_chip *chip) -{ - return chip->info->family == MV88E6XXX_FAMILY_6185; -} - static bool mv88e6xxx_6320_family(struct mv88e6xxx_chip *chip) { return chip->info->family == MV88E6XXX_FAMILY_6320; @@ -749,6 +739,12 @@ static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, goto restore_link; } + if (chip->info->ops->port_set_cmode) { + err = chip->info->ops->port_set_cmode(chip, port, mode); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + err = 0; restore_link: if (chip->info->ops->port_set_link(chip, port, link)) @@ -2579,31 +2575,23 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * received packets as usual, disable ARP mirroring and don't send a * copy of all transmitted/received frames on this port to the CPU. */ - reg = 0; - if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6095_family(chip) || mv88e6xxx_6320_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6341_family(chip)) - reg = PORT_CONTROL_2_MAP_DA; - - if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) { - /* Set the upstream port this port should use */ - reg |= dsa_upstream_port(ds); - /* enable forwarding of unknown multicast addresses to - * the upstream port - */ - if (port == dsa_upstream_port(ds)) - reg |= PORT_CONTROL_2_FORWARD_UNKNOWN; - } - - reg |= PORT_CONTROL_2_8021Q_DISABLED; + err = mv88e6xxx_port_set_map_da(chip, port); + if (err) + return err; - if (reg) { - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + reg = 0; + if (chip->info->ops->port_set_upstream_port) { + err = chip->info->ops->port_set_upstream_port( + chip, port, dsa_upstream_port(ds)); if (err) return err; } + err = mv88e6xxx_port_set_8021q_mode(chip, port, + PORT_CONTROL_2_8021Q_DISABLED); + if (err) + return err; + if (chip->info->ops->port_jumbo_config) { err = chip->info->ops->port_jumbo_config(chip, port); if (err) @@ -3138,7 +3126,8 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, .port_set_frame_mode = mv88e6085_port_set_frame_mode, - .port_set_egress_unknowns = mv88e6085_port_set_egress_unknowns, + .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns, + .port_set_upstream_port = mv88e6095_port_set_upstream_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, @@ -3204,8 +3193,9 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .port_set_speed = mv88e6185_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, - .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns, + .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns, .port_set_ether_type = mv88e6351_port_set_ether_type, + .port_set_upstream_port = mv88e6095_port_set_upstream_port, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_config = mv88e6097_port_pause_config, @@ -3381,8 +3371,9 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, .port_set_frame_mode = mv88e6085_port_set_frame_mode, - .port_set_egress_unknowns = mv88e6085_port_set_egress_unknowns, + .port_set_egress_unknowns = mv88e6095_port_set_egress_unknowns, .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting, + .port_set_upstream_port = mv88e6095_port_set_upstream_port, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, @@ -3520,6 +3511,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns, .port_set_ether_type = mv88e6351_port_set_ether_type, .port_pause_config = mv88e6390_port_pause_config, + .port_set_cmode = mv88e6390x_port_set_cmode, .stats_snapshot = mv88e6390_g1_stats_snapshot, .stats_set_histogram = mv88e6390_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, @@ -3738,6 +3730,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_config = mv88e6390_port_pause_config, + .port_set_cmode = mv88e6390x_port_set_cmode, .stats_snapshot = mv88e6390_g1_stats_snapshot, .stats_set_histogram = mv88e6390_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 353e26bea3c3..50e4e0be4227 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -501,18 +501,110 @@ static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) return mv88e6xxx_g2_smi_phy_wait(chip); } +static int mv88e6xxx_g2_smi_phy_write_addr(struct mv88e6xxx_chip *chip, + int addr, int device, int reg, + bool external) +{ + int cmd = SMI_CMD_OP_45_WRITE_ADDR | (addr << 5) | device; + int err; + + if (external) + cmd |= GLOBAL2_SMI_PHY_CMD_EXTERNAL; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, reg); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); +} + +int mv88e6xxx_g2_smi_phy_read_c45(struct mv88e6xxx_chip *chip, int addr, + int reg_c45, u16 *val, bool external) +{ + int device = (reg_c45 >> 16) & 0x1f; + int reg = reg_c45 & 0xffff; + int err; + u16 cmd; + + err = mv88e6xxx_g2_smi_phy_write_addr(chip, addr, device, reg, + external); + if (err) + return err; + + cmd = GLOBAL2_SMI_PHY_CMD_OP_45_READ_DATA | (addr << 5) | device; + + if (external) + cmd |= GLOBAL2_SMI_PHY_CMD_EXTERNAL; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + err = mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); + if (err) + return err; + + err = *val; + + return 0; +} + +int mv88e6xxx_g2_smi_phy_read_c22(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 *val, bool external) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; + int err; + + if (external) + cmd |= GLOBAL2_SMI_PHY_CMD_EXTERNAL; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); +} + int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int addr, int reg, u16 *val) { - u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + bool external = mdio_bus->external; + + if (reg & MII_ADDR_C45) + return mv88e6xxx_g2_smi_phy_read_c45(chip, addr, reg, val, + external); + return mv88e6xxx_g2_smi_phy_read_c22(chip, addr, reg, val, external); +} + +int mv88e6xxx_g2_smi_phy_write_c45(struct mv88e6xxx_chip *chip, int addr, + int reg_c45, u16 val, bool external) +{ + int device = (reg_c45 >> 16) & 0x1f; + int reg = reg_c45 & 0xffff; int err; + u16 cmd; - if (mdio_bus->external) + err = mv88e6xxx_g2_smi_phy_write_addr(chip, addr, device, reg, + external); + if (err) + return err; + + cmd = GLOBAL2_SMI_PHY_CMD_OP_45_WRITE_DATA | (addr << 5) | device; + + if (external) cmd |= GLOBAL2_SMI_PHY_CMD_EXTERNAL; - err = mv88e6xxx_g2_smi_phy_wait(chip); + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, val); if (err) return err; @@ -520,18 +612,16 @@ int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, if (err) return err; - return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); + return 0; } -int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, - struct mii_bus *bus, - int addr, int reg, u16 val) +int mv88e6xxx_g2_smi_phy_write_c22(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 val, bool external) { u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; - struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; int err; - if (mdio_bus->external) + if (external) cmd |= GLOBAL2_SMI_PHY_CMD_EXTERNAL; err = mv88e6xxx_g2_smi_phy_wait(chip); @@ -545,6 +635,20 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); } +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int reg, u16 val) +{ + struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + bool external = mdio_bus->external; + + if (reg & MII_ADDR_C45) + return mv88e6xxx_g2_smi_phy_write_c45(chip, addr, reg, val, + external); + + return mv88e6xxx_g2_smi_phy_write_c22(chip, addr, reg, val, external); +} + static void mv88e6xxx_g2_irq_mask(struct irq_data *d) { struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 9c5c0472b211..8a21800374f3 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -58,6 +58,9 @@ #define PORT_STATUS_CMODE_100BASE_X 0x8 #define PORT_STATUS_CMODE_1000BASE_X 0x9 #define PORT_STATUS_CMODE_SGMII 0xa +#define PORT_STATUS_CMODE_2500BASEX 0xb +#define PORT_STATUS_CMODE_XAUI 0xc +#define PORT_STATUS_CMODE_RXAUI 0xd #define PORT_PCS_CTRL 0x01 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK BIT(15) #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK BIT(14) @@ -165,6 +168,7 @@ #define PORT_CONTROL_2_FORWARD_UNKNOWN BIT(6) #define PORT_CONTROL_2_EGRESS_MONITOR BIT(5) #define PORT_CONTROL_2_INGRESS_MONITOR BIT(4) +#define PORT_CONTROL_2_UPSTREAM_MASK 0x0f #define PORT_RATE_CONTROL 0x09 #define PORT_RATE_CONTROL_2 0x0a #define PORT_ASSOC_VECTOR 0x0b @@ -397,6 +401,13 @@ #define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA ((0x2 << 10) | \ GLOBAL2_SMI_PHY_CMD_MODE_22 | \ GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_CMD_OP_45_WRITE_ADDR ((0x0 << 10) | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_CMD_OP_45_WRITE_DATA ((0x1 << 10) | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_CMD_OP_45_READ_DATA ((0x3 << 10) | \ + GLOBAL2_SMI_PHY_CMD_BUSY) + #define GLOBAL2_SMI_PHY_DATA 0x19 #define GLOBAL2_SCRATCH_MISC 0x1a #define GLOBAL2_SCRATCH_BUSY BIT(15) @@ -838,6 +849,18 @@ struct mv88e6xxx_ops { int (*port_egress_rate_limiting)(struct mv88e6xxx_chip *chip, int port); int (*port_pause_config)(struct mv88e6xxx_chip *chip, int port); + /* CMODE control what PHY mode the MAC will use, eg. SGMII, RGMII, etc. + * Some chips allow this to be configured on specific ports. + */ + int (*port_set_cmode)(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); + + /* Some devices have a per port register indicating what is + * the upstream port this port should forward to. + */ + int (*port_set_upstream_port)(struct mv88e6xxx_chip *chip, int port, + int upstream_port); + /* Snapshot the statistics for a port. The statistics can then * be read back a leisure but still with a consistent view. */ diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index d380a93b092c..8875784c4718 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -11,6 +11,7 @@ * (at your option) any later version. */ +#include <linux/phy.h> #include "mv88e6xxx.h" #include "port.h" @@ -304,6 +305,69 @@ int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) return mv88e6xxx_port_set_speed(chip, port, speed, true, true); } +int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + u16 reg; + u16 cmode; + int err; + + if (mode == PHY_INTERFACE_MODE_NA) + return 0; + + if (port != 9 && port != 10) + return -EOPNOTSUPP; + + switch (mode) { + case PHY_INTERFACE_MODE_1000BASEX: + cmode = PORT_STATUS_CMODE_1000BASE_X; + break; + case PHY_INTERFACE_MODE_SGMII: + cmode = PORT_STATUS_CMODE_SGMII; + break; + case PHY_INTERFACE_MODE_2500BASEX: + cmode = PORT_STATUS_CMODE_2500BASEX; + break; + case PHY_INTERFACE_MODE_XGMII: + cmode = PORT_STATUS_CMODE_XAUI; + break; + case PHY_INTERFACE_MODE_RXAUI: + cmode = PORT_STATUS_CMODE_RXAUI; + break; + default: + cmode = 0; + } + + if (cmode) { + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + + reg &= ~PORT_STATUS_CMODE_MASK; + reg |= cmode; + + err = mv88e6xxx_port_write(chip, port, PORT_STATUS, reg); + if (err) + return err; + } + + return 0; +} + +int mv88e6xxx_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode) +{ + int err; + u16 reg; + + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + + *cmode = reg & PORT_STATUS_CMODE_MASK; + + return 0; +} + /* Offset 0x02: Pause Control * * Do not limit the period of time that this port can be paused for by @@ -608,6 +672,40 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { [PORT_CONTROL_2_8021Q_SECURE] = "Secure", }; +int mv88e6095_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port, + bool on) +{ + int err; + u16 reg; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) + return err; + + if (on) + reg |= PORT_CONTROL_2_FORWARD_UNKNOWN; + else + reg &= ~PORT_CONTROL_2_FORWARD_UNKNOWN; + + return mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); +} + +int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, + int upstream_port) +{ + int err; + u16 reg; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) + return err; + + reg &= ~PORT_CONTROL_2_UPSTREAM_MASK; + reg |= upstream_port; + + return mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); +} + int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, u16 mode) { @@ -631,6 +729,20 @@ int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, return 0; } +int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) + return err; + + reg |= PORT_CONTROL_2_MAP_DA; + + return mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); +} + int mv88e6165_port_jumbo_config(struct mv88e6xxx_chip *chip, int port) { u16 reg; diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 7b3bacaacbfe..c83cbb3f4491 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -58,6 +58,8 @@ int mv88e6351_port_set_frame_mode(struct mv88e6xxx_chip *chip, int port, enum mv88e6xxx_frame_mode mode); int mv88e6085_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port, bool on); +int mv88e6095_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port, + bool on); int mv88e6351_port_set_egress_unknowns(struct mv88e6xxx_chip *chip, int port, bool on); int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port, @@ -67,5 +69,10 @@ int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port); int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port); int mv88e6097_port_pause_config(struct mv88e6xxx_chip *chip, int port); int mv88e6390_port_pause_config(struct mv88e6xxx_chip *chip, int port); - +int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); +int mv88e6xxx_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode); +int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port); +int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, + int upstream_port); #endif /* _MV88E6XXX_PORT_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index 6082ed1b5ea0..a7ca45b251cb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o +bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aff3dc114a5b..cda1c787e8e1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -33,6 +33,7 @@ #include <linux/if.h> #include <linux/if_vlan.h> #include <linux/rtc.h> +#include <linux/bpf.h> #include <net/ip.h> #include <net/tcp.h> #include <net/udp.h> @@ -53,6 +54,7 @@ #include "bnxt_sriov.h" #include "bnxt_ethtool.h" #include "bnxt_dcb.h" +#include "bnxt_xdp.h" #define BNXT_TX_TIMEOUT (5 * HZ) @@ -210,16 +212,7 @@ static bool bnxt_vf_pciid(enum board_idx idx) #define BNXT_CP_DB_IRQ_DIS(db) \ writel(DB_CP_IRQ_DIS_FLAGS, db) -static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr) -{ - /* Tell compiler to fetch tx indices from memory. */ - barrier(); - - return bp->tx_ring_size - - ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask); -} - -static const u16 bnxt_lhint_arr[] = { +const u16 bnxt_lhint_arr[] = { TX_BD_FLAGS_LHINT_512_AND_SMALLER, TX_BD_FLAGS_LHINT_512_TO_1023, TX_BD_FLAGS_LHINT_1024_TO_2047, @@ -262,8 +255,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - txr = &bp->tx_ring[i]; txq = netdev_get_tx_queue(dev, i); + txr = &bp->tx_ring[bp->tx_ring_map[i]]; prod = txr->tx_prod; free_size = bnxt_tx_avail(bp, txr); @@ -509,8 +502,7 @@ tx_dma_error: static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; - int index = txr - &bp->tx_ring[0]; - struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, index); + struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); u16 cons = txr->tx_cons; struct pci_dev *pdev = bp->pdev; int i; @@ -573,6 +565,25 @@ next_tx_int: } } +static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, + gfp_t gfp) +{ + struct device *dev = &bp->pdev->dev; + struct page *page; + + page = alloc_page(gfp); + if (!page) + return NULL; + + *mapping = dma_map_page(dev, page, 0, PAGE_SIZE, bp->rx_dir); + if (dma_mapping_error(dev, *mapping)) { + __free_page(page); + return NULL; + } + *mapping += bp->rx_dma_offset; + return page; +} + static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping, gfp_t gfp) { @@ -583,8 +594,8 @@ static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping, if (!data) return NULL; - *mapping = dma_map_single(&pdev->dev, data + BNXT_RX_DMA_OFFSET, - bp->rx_buf_use_size, PCI_DMA_FROMDEVICE); + *mapping = dma_map_single(&pdev->dev, data + bp->rx_dma_offset, + bp->rx_buf_use_size, bp->rx_dir); if (dma_mapping_error(&pdev->dev, *mapping)) { kfree(data); @@ -593,29 +604,37 @@ static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping, return data; } -static inline int bnxt_alloc_rx_data(struct bnxt *bp, - struct bnxt_rx_ring_info *rxr, - u16 prod, gfp_t gfp) +int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + u16 prod, gfp_t gfp) { struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod]; - u8 *data; dma_addr_t mapping; - data = __bnxt_alloc_rx_data(bp, &mapping, gfp); - if (!data) - return -ENOMEM; + if (BNXT_RX_PAGE_MODE(bp)) { + struct page *page = __bnxt_alloc_rx_page(bp, &mapping, gfp); - rx_buf->data = data; - dma_unmap_addr_set(rx_buf, mapping, mapping); + if (!page) + return -ENOMEM; - rxbd->rx_bd_haddr = cpu_to_le64(mapping); + rx_buf->data = page; + rx_buf->data_ptr = page_address(page) + bp->rx_offset; + } else { + u8 *data = __bnxt_alloc_rx_data(bp, &mapping, gfp); + if (!data) + return -ENOMEM; + + rx_buf->data = data; + rx_buf->data_ptr = data + bp->rx_offset; + } + rx_buf->mapping = mapping; + + rxbd->rx_bd_haddr = cpu_to_le64(mapping); return 0; } -static void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, - u8 *data) +void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data) { u16 prod = rxr->rx_prod; struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf; @@ -625,9 +644,9 @@ static void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf->data = data; + prod_rx_buf->data_ptr = cons_rx_buf->data_ptr; - dma_unmap_addr_set(prod_rx_buf, mapping, - dma_unmap_addr(cons_rx_buf, mapping)); + prod_rx_buf->mapping = cons_rx_buf->mapping; prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; cons_bd = &rxr->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)]; @@ -753,13 +772,60 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_napi *bnapi, u16 cp_cons, rxr->rx_sw_agg_prod = sw_prod; } +static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr, + u16 cons, void *data, u8 *data_ptr, + dma_addr_t dma_addr, + unsigned int offset_and_len) +{ + unsigned int payload = offset_and_len >> 16; + unsigned int len = offset_and_len & 0xffff; + struct skb_frag_struct *frag; + struct page *page = data; + u16 prod = rxr->rx_prod; + struct sk_buff *skb; + int off, err; + + err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC); + if (unlikely(err)) { + bnxt_reuse_rx_data(rxr, cons, data); + return NULL; + } + dma_addr -= bp->rx_dma_offset; + dma_unmap_page(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir); + + if (unlikely(!payload)) + payload = eth_get_headlen(data_ptr, len); + + skb = napi_alloc_skb(&rxr->bnapi->napi, payload); + if (!skb) { + __free_page(page); + return NULL; + } + + off = (void *)data_ptr - page_address(page); + skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE); + memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN, + payload + NET_IP_ALIGN); + + frag = &skb_shinfo(skb)->frags[0]; + skb_frag_size_sub(frag, payload); + frag->page_offset += payload; + skb->data_len -= payload; + skb->tail += payload; + + return skb; +} + static struct sk_buff *bnxt_rx_skb(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - u16 prod, u8 *data, dma_addr_t dma_addr, - unsigned int len) + void *data, u8 *data_ptr, + dma_addr_t dma_addr, + unsigned int offset_and_len) { - int err; + u16 prod = rxr->rx_prod; struct sk_buff *skb; + int err; err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC); if (unlikely(err)) { @@ -769,14 +835,14 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp, skb = build_skb(data, 0); dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, - PCI_DMA_FROMDEVICE); + bp->rx_dir); if (!skb) { kfree(data); return NULL; } - skb_reserve(skb, BNXT_RX_OFFSET); - skb_put(skb, len); + skb_reserve(skb, bp->rx_offset); + skb_put(skb, offset_and_len & 0xffff); return skb; } @@ -812,7 +878,7 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_napi *bnapi, * a sw_prod index that equals the cons index, so we * need to clear the cons entry now. */ - mapping = dma_unmap_addr(cons_rx_buf, mapping); + mapping = cons_rx_buf->mapping; page = cons_rx_buf->page; cons_rx_buf->page = NULL; @@ -875,14 +941,14 @@ static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, if (!skb) return NULL; - dma_sync_single_for_cpu(&pdev->dev, mapping, - bp->rx_copy_thresh, PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copy_thresh, + bp->rx_dir); - memcpy(skb->data - BNXT_RX_OFFSET, data, len + BNXT_RX_OFFSET); + memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN, + len + NET_IP_ALIGN); - dma_sync_single_for_device(&pdev->dev, mapping, - bp->rx_copy_thresh, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copy_thresh, + bp->rx_dir); skb_put(skb, len); return skb; @@ -951,17 +1017,19 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, } prod_rx_buf->data = tpa_info->data; + prod_rx_buf->data_ptr = tpa_info->data_ptr; mapping = tpa_info->mapping; - dma_unmap_addr_set(prod_rx_buf, mapping, mapping); + prod_rx_buf->mapping = mapping; prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; prod_bd->rx_bd_haddr = cpu_to_le64(mapping); tpa_info->data = cons_rx_buf->data; + tpa_info->data_ptr = cons_rx_buf->data_ptr; cons_rx_buf->data = NULL; - tpa_info->mapping = dma_unmap_addr(cons_rx_buf, mapping); + tpa_info->mapping = cons_rx_buf->mapping; tpa_info->len = le32_to_cpu(tpa_start->rx_tpa_start_cmp_len_flags_type) >> @@ -1187,17 +1255,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, u32 *raw_cons, struct rx_tpa_end_cmp *tpa_end, struct rx_tpa_end_cmp_ext *tpa_end1, - bool *agg_event) + u8 *event) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u8 agg_id = TPA_END_AGG_ID(tpa_end); - u8 *data, agg_bufs; + u8 *data_ptr, agg_bufs; u16 cp_cons = RING_CMP(*raw_cons); unsigned int len; struct bnxt_tpa_info *tpa_info; dma_addr_t mapping; struct sk_buff *skb; + void *data; if (unlikely(bnapi->in_reset)) { int rc = bnxt_discard_rx(bp, bnapi, raw_cons, tpa_end); @@ -1209,7 +1278,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, tpa_info = &rxr->rx_tpa[agg_id]; data = tpa_info->data; - prefetch(data); + data_ptr = tpa_info->data_ptr; + prefetch(data_ptr); len = tpa_info->len; mapping = tpa_info->mapping; @@ -1220,7 +1290,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons)) return ERR_PTR(-EBUSY); - *agg_event = true; + *event |= BNXT_AGG_EVENT; cp_cons = NEXT_CMP(cp_cons); } @@ -1232,7 +1302,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, } if (len <= bp->rx_copy_thresh) { - skb = bnxt_copy_skb(bnapi, data, len, mapping); + skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs); return NULL; @@ -1248,18 +1318,19 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, } tpa_info->data = new_data; + tpa_info->data_ptr = new_data + bp->rx_offset; tpa_info->mapping = new_mapping; skb = build_skb(data, 0); dma_unmap_single(&bp->pdev->dev, mapping, bp->rx_buf_use_size, - PCI_DMA_FROMDEVICE); + bp->rx_dir); if (!skb) { kfree(data); bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs); return NULL; } - skb_reserve(skb, BNXT_RX_OFFSET); + skb_reserve(skb, bp->rx_offset); skb_put(skb, len); } @@ -1305,7 +1376,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, * -EIO - packet aborted due to hw error indicated in BD */ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, - bool *agg_event) + u8 *event) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; @@ -1316,10 +1387,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); struct bnxt_sw_rx_bd *rx_buf; unsigned int len; - u8 *data, agg_bufs, cmp_type; + u8 *data_ptr, agg_bufs, cmp_type; dma_addr_t dma_addr; struct sk_buff *skb; + void *data; int rc = 0; + u32 misc; rxcmp = (struct rx_cmp *) &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; @@ -1340,13 +1413,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, bnxt_tpa_start(bp, rxr, (struct rx_tpa_start_cmp *)rxcmp, (struct rx_tpa_start_cmp_ext *)rxcmp1); + *event |= BNXT_RX_EVENT; goto next_rx_no_prod; } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { skb = bnxt_tpa_end(bp, bnapi, &tmp_raw_cons, (struct rx_tpa_end_cmp *)rxcmp, - (struct rx_tpa_end_cmp_ext *)rxcmp1, - agg_event); + (struct rx_tpa_end_cmp_ext *)rxcmp1, event); if (unlikely(IS_ERR(skb))) return -EBUSY; @@ -1357,30 +1430,33 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, napi_gro_receive(&bnapi->napi, skb); rc = 1; } + *event |= BNXT_RX_EVENT; goto next_rx_no_prod; } cons = rxcmp->rx_cmp_opaque; rx_buf = &rxr->rx_buf_ring[cons]; data = rx_buf->data; + data_ptr = rx_buf->data_ptr; if (unlikely(cons != rxr->rx_next_cons)) { int rc1 = bnxt_discard_rx(bp, bnapi, raw_cons, rxcmp); bnxt_sched_reset(bp, rxr); return rc1; } - prefetch(data); + prefetch(data_ptr); - agg_bufs = (le32_to_cpu(rxcmp->rx_cmp_misc_v1) & RX_CMP_AGG_BUFS) >> - RX_CMP_AGG_BUFS_SHIFT; + misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1); + agg_bufs = (misc & RX_CMP_AGG_BUFS) >> RX_CMP_AGG_BUFS_SHIFT; if (agg_bufs) { if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, &tmp_raw_cons)) return -EBUSY; cp_cons = NEXT_CMP(cp_cons); - *agg_event = true; + *event |= BNXT_AGG_EVENT; } + *event |= BNXT_RX_EVENT; rx_buf->data = NULL; if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) { @@ -1393,17 +1469,29 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, } len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT; - dma_addr = dma_unmap_addr(rx_buf, mapping); + dma_addr = rx_buf->mapping; + + if (bnxt_rx_xdp(bp, rxr, cons, data, &data_ptr, &len, event)) { + rc = 1; + goto next_rx; + } if (len <= bp->rx_copy_thresh) { - skb = bnxt_copy_skb(bnapi, data, len, dma_addr); + skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { rc = -ENOMEM; goto next_rx; } } else { - skb = bnxt_rx_skb(bp, rxr, cons, prod, data, dma_addr, len); + u32 payload; + + if (rx_buf->data_ptr == data_ptr) + payload = misc & RX_CMP_PAYLOAD_OFFSET; + else + payload = 0; + skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr, + payload | len); if (!skb) { rc = -ENOMEM; goto next_rx; @@ -1627,8 +1715,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) u32 cons; int tx_pkts = 0; int rx_pkts = 0; - bool rx_event = false; - bool agg_event = false; + u8 event = 0; struct tx_cmp *txcmp; while (1) { @@ -1650,12 +1737,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (unlikely(tx_pkts > bp->tx_wake_thresh)) rx_pkts = budget; } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { - rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event); + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); if (likely(rc >= 0)) rx_pkts += rc; else if (rc == -EBUSY) /* partial completion */ break; - rx_event = true; } else if (unlikely((TX_CMP_TYPE(txcmp) == CMPL_BASE_TYPE_HWRM_DONE) || (TX_CMP_TYPE(txcmp) == @@ -1670,6 +1756,18 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) break; } + if (event & BNXT_TX_EVENT) { + struct bnxt_tx_ring_info *txr = bnapi->tx_ring; + void __iomem *db = txr->tx_doorbell; + u16 prod = txr->tx_prod; + + /* Sync BD data before updating doorbell */ + wmb(); + + writel(DB_KEY_TX | prod, db); + writel(DB_KEY_TX | prod, db); + } + cpr->cp_raw_cons = raw_cons; /* ACK completion ring before freeing tx ring and producing new * buffers in rx/agg rings to prevent overflowing the completion @@ -1678,14 +1776,14 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); if (tx_pkts) - bnxt_tx_int(bp, bnapi, tx_pkts); + bnapi->tx_int(bp, bnapi, tx_pkts); - if (rx_event) { + if (event & BNXT_RX_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); - if (agg_event) { + if (event & BNXT_AGG_EVENT) { writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell); writel(DB_KEY_RX | rxr->rx_agg_prod, @@ -1706,7 +1804,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) u32 cp_cons, tmp_raw_cons; u32 raw_cons = cpr->cp_raw_cons; u32 rx_pkts = 0; - bool agg_event = false; + u8 event = 0; while (1) { int rc; @@ -1730,7 +1828,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) rxcmp1->rx_cmp_cfa_code_errors_v2 |= cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); - rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event); + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); if (likely(rc == -EIO)) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ @@ -1753,7 +1851,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); - if (agg_event) { + if (event & BNXT_AGG_EVENT) { writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell); writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell); } @@ -1866,11 +1964,9 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) if (!data) continue; - dma_unmap_single( - &pdev->dev, - dma_unmap_addr(tpa_info, mapping), - bp->rx_buf_use_size, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&pdev->dev, tpa_info->mapping, + bp->rx_buf_use_size, + bp->rx_dir); tpa_info->data = NULL; @@ -1880,19 +1976,20 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) for (j = 0; j < max_idx; j++) { struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j]; - u8 *data = rx_buf->data; + void *data = rx_buf->data; if (!data) continue; - dma_unmap_single(&pdev->dev, - dma_unmap_addr(rx_buf, mapping), - bp->rx_buf_use_size, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&pdev->dev, rx_buf->mapping, + bp->rx_buf_use_size, bp->rx_dir); rx_buf->data = NULL; - kfree(data); + if (BNXT_RX_PAGE_MODE(bp)) + __free_page(data); + else + kfree(data); } for (j = 0; j < max_agg_idx; j++) { @@ -1903,8 +2000,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) if (!page) continue; - dma_unmap_page(&pdev->dev, - dma_unmap_addr(rx_agg_buf, mapping), + dma_unmap_page(&pdev->dev, rx_agg_buf->mapping, BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE); rx_agg_buf->page = NULL; @@ -1995,6 +2091,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp) struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; + if (rxr->xdp_prog) + bpf_prog_put(rxr->xdp_prog); + kfree(rxr->rx_tpa); rxr->rx_tpa = NULL; @@ -2133,6 +2232,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) memset(txr->tx_push, 0, sizeof(struct tx_push_bd)); } ring->queue_id = bp->q_info[j].queue_id; + if (i < bp->tx_nr_rings_xdp) + continue; if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1)) j++; } @@ -2280,6 +2381,15 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) ring = &rxr->rx_ring_struct; bnxt_init_rxbd_pages(ring, type); + if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) { + rxr->xdp_prog = bpf_prog_add(bp->xdp_prog, 1); + if (IS_ERR(rxr->xdp_prog)) { + int rc = PTR_ERR(rxr->xdp_prog); + + rxr->xdp_prog = NULL; + return rc; + } + } prod = rxr->rx_prod; for (i = 0; i < bp->rx_ring_size; i++) { if (bnxt_alloc_rx_data(bp, rxr, prod, GFP_KERNEL) != 0) { @@ -2326,6 +2436,7 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) return -ENOMEM; rxr->rx_tpa[i].data = data; + rxr->rx_tpa[i].data_ptr = data + bp->rx_offset; rxr->rx_tpa[i].mapping = mapping; } } else { @@ -2341,6 +2452,14 @@ static int bnxt_init_rx_rings(struct bnxt *bp) { int i, rc = 0; + if (BNXT_RX_PAGE_MODE(bp)) { + bp->rx_offset = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + bp->rx_dma_offset = XDP_PACKET_HEADROOM; + } else { + bp->rx_offset = BNXT_RX_OFFSET; + bp->rx_dma_offset = BNXT_RX_DMA_OFFSET; + } + for (i = 0; i < bp->rx_nr_rings; i++) { rc = bnxt_init_one_rx_ring(bp, i); if (rc) @@ -2464,7 +2583,7 @@ static int bnxt_calc_nr_ring_pages(u32 ring_size, int desc_per_pg) return pages; } -static void bnxt_set_tpa_flags(struct bnxt *bp) +void bnxt_set_tpa_flags(struct bnxt *bp) { bp->flags &= ~BNXT_FLAG_TPA; if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) @@ -2550,6 +2669,27 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->cp_ring_mask = bp->cp_bit - 1; } +int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) +{ + if (page_mode) { + if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) + return -EOPNOTSUPP; + bp->dev->max_mtu = BNXT_MAX_PAGE_MODE_MTU; + bp->flags &= ~BNXT_FLAG_AGG_RINGS; + bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE; + bp->dev->hw_features &= ~NETIF_F_LRO; + bp->dev->features &= ~NETIF_F_LRO; + bp->rx_dir = DMA_BIDIRECTIONAL; + bp->rx_skb_func = bnxt_rx_page_skb; + } else { + bp->dev->max_mtu = BNXT_MAX_MTU; + bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; + bp->rx_dir = DMA_FROM_DEVICE; + bp->rx_skb_func = bnxt_rx_skb; + } + return 0; +} + static void bnxt_free_vnic_attributes(struct bnxt *bp) { int i; @@ -2859,6 +2999,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_stats(bp); bnxt_free_ring_grps(bp); bnxt_free_vnics(bp); + kfree(bp->tx_ring_map); + bp->tx_ring_map = NULL; kfree(bp->tx_ring); bp->tx_ring = NULL; kfree(bp->rx_ring); @@ -2911,6 +3053,12 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) if (!bp->tx_ring) return -ENOMEM; + bp->tx_ring_map = kcalloc(bp->tx_nr_rings, sizeof(u16), + GFP_KERNEL); + + if (!bp->tx_ring_map) + return -ENOMEM; + if (bp->flags & BNXT_FLAG_SHARED_RINGS) j = 0; else @@ -2919,6 +3067,15 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) for (i = 0; i < bp->tx_nr_rings; i++, j++) { bp->tx_ring[i].bnapi = bp->bnapi[j]; bp->bnapi[j]->tx_ring = &bp->tx_ring[i]; + bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i; + if (i >= bp->tx_nr_rings_xdp) { + bp->tx_ring[i].txq_index = i - + bp->tx_nr_rings_xdp; + bp->bnapi[j]->tx_int = bnxt_tx_int; + } else { + bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP; + bp->bnapi[j]->tx_int = bnxt_tx_int_xdp; + } } rc = bnxt_alloc_stats(bp); @@ -4096,7 +4253,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings) return rc; } -int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings) +static int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings) { struct hwrm_func_cfg_input req = {0}; int rc; @@ -4841,7 +4998,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp) int rc; struct net_device *dev = bp->dev; - rc = netif_set_real_num_tx_queues(dev, bp->tx_nr_rings); + rc = netif_set_real_num_tx_queues(dev, bp->tx_nr_rings - + bp->tx_nr_rings_xdp); if (rc) return rc; @@ -4889,19 +5047,12 @@ static void bnxt_setup_msix(struct bnxt *bp) tcs = netdev_get_num_tc(dev); if (tcs > 1) { - bp->tx_nr_rings_per_tc = bp->tx_nr_rings / tcs; - if (bp->tx_nr_rings_per_tc == 0) { - netdev_reset_tc(dev); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; - } else { - int i, off, count; + int i, off, count; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs; - for (i = 0; i < tcs; i++) { - count = bp->tx_nr_rings_per_tc; - off = i * count; - netdev_set_tc_queue(dev, i, count, off); - } + for (i = 0; i < tcs; i++) { + count = bp->tx_nr_rings_per_tc; + off = i * count; + netdev_set_tc_queue(dev, i, count, off); } } @@ -6463,6 +6614,37 @@ static void bnxt_sp_task(struct work_struct *work) clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); } +/* Under rtnl_lock */ +int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp) +{ + int max_rx, max_tx, tx_sets = 1; + int tx_rings_needed; + bool sh = true; + int rc; + + if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) + sh = false; + + if (tcs) + tx_sets = tcs; + + rc = bnxt_get_max_rings(bp, &max_rx, &max_tx, sh); + if (rc) + return rc; + + if (max_rx < rx) + return -ENOMEM; + + tx_rings_needed = tx * tx_sets + tx_xdp; + if (max_tx < tx_rings_needed) + return -ENOMEM; + + if (bnxt_hwrm_reserve_tx_rings(bp, &tx_rings_needed) || + tx_rings_needed < (tx * tx_sets + tx_xdp)) + return -ENOMEM; + return 0; +} + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@ -6625,6 +6807,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) { struct bnxt *bp = netdev_priv(dev); bool sh = false; + int rc; if (tc > bp->max_tc) { netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n", @@ -6638,19 +6821,10 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) if (bp->flags & BNXT_FLAG_SHARED_RINGS) sh = true; - if (tc) { - int max_rx_rings, max_tx_rings, req_tx_rings, rsv_tx_rings, rc; - - req_tx_rings = bp->tx_nr_rings_per_tc * tc; - rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); - if (rc || req_tx_rings > max_tx_rings) - return -ENOMEM; - - rsv_tx_rings = req_tx_rings; - if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings) || - rsv_tx_rings < req_tx_rings) - return -ENOMEM; - } + rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings, + tc, bp->tx_nr_rings_xdp); + if (rc) + return rc; /* Needs to close the device and do hw resource re-allocations */ if (netif_running(bp->dev)) @@ -6958,6 +7132,7 @@ static const struct net_device_ops bnxt_netdev_ops = { #endif .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, + .ndo_xdp = bnxt_xdp, }; static void bnxt_remove_one(struct pci_dev *pdev) @@ -6982,6 +7157,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) pci_iounmap(pdev, bp->bar0); kfree(bp->edev); bp->edev = NULL; + if (bp->xdp_prog) + bpf_prog_put(bp->xdp_prog); free_netdev(dev); pci_release_regions(pdev); @@ -7258,7 +7435,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* MTU range: 60 - 9500 */ dev->min_mtu = ETH_ZLEN; - dev->max_mtu = 9500; + dev->max_mtu = BNXT_MAX_MTU; bnxt_dcb_init(bp); @@ -7299,6 +7476,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_hwrm_func_qcfg(bp); bnxt_hwrm_port_led_qcaps(bp); + bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); bnxt_set_max_func_irqs(bp, max_irqs); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 52a1cc061ba3..9f07b9cf8965 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -416,6 +416,11 @@ struct rx_tpa_end_cmp_ext { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) +#define BNXT_MAX_MTU 9500 +#define BNXT_MAX_PAGE_MODE_MTU \ + ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ + XDP_PACKET_HEADROOM) + #define BNXT_MIN_PKT_SIZE 52 #define BNXT_NUM_TESTS(bp) 0 @@ -507,17 +512,25 @@ struct rx_tpa_end_cmp_ext { #define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \ BNXT_HWRM_REQ_MAX_SIZE) +#define BNXT_RX_EVENT 1 +#define BNXT_AGG_EVENT 2 +#define BNXT_TX_EVENT 4 + struct bnxt_sw_tx_bd { struct sk_buff *skb; DEFINE_DMA_UNMAP_ADDR(mapping); u8 is_gso; u8 is_push; - unsigned short nr_frags; + union { + unsigned short nr_frags; + u16 rx_prod; + }; }; struct bnxt_sw_rx_bd { - u8 *data; - DEFINE_DMA_UNMAP_ADDR(mapping); + void *data; + u8 *data_ptr; + dma_addr_t mapping; }; struct bnxt_sw_rx_agg_bd { @@ -558,6 +571,7 @@ struct bnxt_tx_ring_info { struct bnxt_napi *bnapi; u16 tx_prod; u16 tx_cons; + u16 txq_index; void __iomem *tx_doorbell; struct tx_bd *tx_desc_ring[MAX_TX_PAGES]; @@ -576,7 +590,8 @@ struct bnxt_tx_ring_info { }; struct bnxt_tpa_info { - u8 *data; + void *data; + u8 *data_ptr; dma_addr_t mapping; u16 len; unsigned short gso_type; @@ -608,6 +623,8 @@ struct bnxt_rx_ring_info { void __iomem *rx_doorbell; void __iomem *rx_agg_doorbell; + struct bpf_prog *xdp_prog; + struct rx_bd *rx_desc_ring[MAX_RX_PAGES]; struct bnxt_sw_rx_bd *rx_buf_ring; @@ -654,6 +671,11 @@ struct bnxt_napi { struct bnxt_rx_ring_info *rx_ring; struct bnxt_tx_ring_info *tx_ring; + void (*tx_int)(struct bnxt *, struct bnxt_napi *, + int); + u32 flags; +#define BNXT_NAPI_FLAG_XDP 0x1 + bool in_reset; }; @@ -965,6 +987,7 @@ struct bnxt { #define BNXT_FLAG_ROCE_CAP (BNXT_FLAG_ROCEV1_CAP | \ BNXT_FLAG_ROCEV2_CAP) #define BNXT_FLAG_NO_AGG_RINGS 0x20000 + #define BNXT_FLAG_RX_PAGE_MODE 0x40000 #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000 #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ @@ -976,6 +999,7 @@ struct bnxt { #define BNXT_NPAR(bp) ((bp)->port_partition_type) #define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp)) #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0) +#define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE) struct bnxt_en_dev *edev; struct bnxt_en_dev * (*ulp_probe)(struct net_device *); @@ -984,12 +1008,21 @@ struct bnxt { struct bnxt_rx_ring_info *rx_ring; struct bnxt_tx_ring_info *tx_ring; + u16 *tx_ring_map; struct sk_buff * (*gro_func)(struct bnxt_tpa_info *, int, int, struct sk_buff *); + struct sk_buff * (*rx_skb_func)(struct bnxt *, + struct bnxt_rx_ring_info *, + u16, void *, u8 *, dma_addr_t, + unsigned int); + u32 rx_buf_size; u32 rx_buf_use_size; /* useable size */ + u16 rx_offset; + u16 rx_dma_offset; + enum dma_data_direction rx_dir; u32 rx_ring_size; u32 rx_agg_ring_size; u32 rx_copy_thresh; @@ -1005,6 +1038,7 @@ struct bnxt { int tx_nr_pages; int tx_nr_rings; int tx_nr_rings_per_tc; + int tx_nr_rings_xdp; int tx_wake_thresh; int tx_push_thresh; @@ -1140,6 +1174,8 @@ struct bnxt { u8 num_leds; struct bnxt_led_info leds[BNXT_MAX_LED]; + + struct bpf_prog *xdp_prog; }; #define BNXT_RX_STATS_OFFSET(counter) \ @@ -1159,7 +1195,23 @@ struct bnxt { #define SFF_MODULE_ID_QSFP28 0x11 #define BNXT_MAX_PHY_I2C_RESP_SIZE 64 +static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr) +{ + /* Tell compiler to fetch tx indices from memory. */ + barrier(); + + return bp->tx_ring_size - + ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask); +} + +extern const u16 bnxt_lhint_arr[]; + +int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + u16 prod, gfp_t gfp); +void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data); +void bnxt_set_tpa_flags(struct bnxt *bp); void bnxt_set_ring_params(struct bnxt *); +int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16); int _hwrm_send_message(struct bnxt *, void *, u32, int); int hwrm_send_message(struct bnxt *, void *, u32, int); @@ -1168,7 +1220,6 @@ int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap, int bmap_size); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); -int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings); int bnxt_hwrm_set_coal(struct bnxt *); unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max); @@ -1182,6 +1233,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_close_nic(struct bnxt *, bool, bool); +int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); void bnxt_restore_pf_fw_resources(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 24818e1e59f3..7aa248db10c6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -387,10 +387,10 @@ static int bnxt_set_channels(struct net_device *dev, struct ethtool_channels *channel) { struct bnxt *bp = netdev_priv(dev); - int max_rx_rings, max_tx_rings, tcs; - int req_tx_rings, rsv_tx_rings; - u32 rc = 0; + int req_tx_rings, req_rx_rings, tcs; bool sh = false; + int tx_xdp = 0; + int rc = 0; if (channel->other_count) return -EINVAL; @@ -410,32 +410,21 @@ static int bnxt_set_channels(struct net_device *dev, if (channel->combined_count) sh = true; - bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); - tcs = netdev_get_num_tc(dev); - if (tcs > 1) - max_tx_rings /= tcs; - - if (sh && - channel->combined_count > max_t(int, max_rx_rings, max_tx_rings)) - return -ENOMEM; - - if (!sh && (channel->rx_count > max_rx_rings || - channel->tx_count > max_tx_rings)) - return -ENOMEM; req_tx_rings = sh ? channel->combined_count : channel->tx_count; - req_tx_rings = min_t(int, req_tx_rings, max_tx_rings); - if (tcs > 1) - req_tx_rings *= tcs; - - rsv_tx_rings = req_tx_rings; - if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings)) - return -ENOMEM; - - if (rsv_tx_rings < req_tx_rings) { - netdev_warn(dev, "Unable to allocate the requested tx rings\n"); - return -ENOMEM; + req_rx_rings = sh ? channel->combined_count : channel->rx_count; + if (bp->tx_nr_rings_xdp) { + if (!sh) { + netdev_err(dev, "Only combined mode supported when XDP is enabled.\n"); + return -EINVAL; + } + tx_xdp = req_rx_rings; + } + rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, tcs, tx_xdp); + if (rc) { + netdev_warn(dev, "Unable to allocate the requested rings\n"); + return rc; } if (netif_running(dev)) { @@ -454,19 +443,17 @@ static int bnxt_set_channels(struct net_device *dev, if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; - bp->rx_nr_rings = min_t(int, channel->combined_count, - max_rx_rings); - bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count, - max_tx_rings); + bp->rx_nr_rings = channel->combined_count; + bp->tx_nr_rings_per_tc = channel->combined_count; } else { bp->flags &= ~BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->rx_count; bp->tx_nr_rings_per_tc = channel->tx_count; } - - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings_xdp = tx_xdp; + bp->tx_nr_rings = bp->tx_nr_rings_per_tc + tx_xdp; if (tcs > 1) - bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs; + bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp; bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : bp->tx_nr_rings + bp->rx_nr_rings; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c new file mode 100644 index 000000000000..899c30fb5188 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -0,0 +1,240 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2016-2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/filter.h> +#include "bnxt_hsi.h" +#include "bnxt.h" +#include "bnxt_xdp.h" + +static void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + dma_addr_t mapping, u32 len, u16 rx_prod) +{ + struct bnxt_sw_tx_bd *tx_buf; + struct tx_bd_ext *txbd1; + struct tx_bd *txbd; + u32 flags; + u16 prod; + + prod = txr->tx_prod; + tx_buf = &txr->tx_buf_ring[prod]; + tx_buf->rx_prod = rx_prod; + + txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | + (2 << TX_BD_FLAGS_BD_CNT_SHIFT) | TX_BD_FLAGS_COAL_NOW | + TX_BD_FLAGS_PACKET_END | bnxt_lhint_arr[len >> 9]; + txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + txbd->tx_bd_opaque = prod; + txbd->tx_bd_haddr = cpu_to_le64(mapping); + + prod = NEXT_TX(prod); + txbd1 = (struct tx_bd_ext *) + &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + + txbd1->tx_bd_hsize_lflags = cpu_to_le32(0); + txbd1->tx_bd_mss = cpu_to_le32(0); + txbd1->tx_bd_cfa_action = cpu_to_le32(0); + txbd1->tx_bd_cfa_meta = cpu_to_le32(0); + + prod = NEXT_TX(prod); + txr->tx_prod = prod; +} + +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) +{ + struct bnxt_tx_ring_info *txr = bnapi->tx_ring; + struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; + struct bnxt_sw_tx_bd *tx_buf; + u16 tx_cons = txr->tx_cons; + u16 last_tx_cons = tx_cons; + u16 rx_prod; + int i; + + for (i = 0; i < nr_pkts; i++) { + last_tx_cons = tx_cons; + tx_cons = NEXT_TX(tx_cons); + tx_cons = NEXT_TX(tx_cons); + } + txr->tx_cons = tx_cons; + if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) { + rx_prod = rxr->rx_prod; + } else { + tx_buf = &txr->tx_buf_ring[last_tx_cons]; + rx_prod = tx_buf->rx_prod; + } + writel(DB_KEY_RX | rx_prod, rxr->rx_doorbell); +} + +/* returns the following: + * true - packet consumed by XDP and new buffer is allocated. + * false - packet should be passed to the stack. + */ +bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, + struct page *page, u8 **data_ptr, unsigned int *len, u8 *event) +{ + struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); + struct bnxt_tx_ring_info *txr; + struct bnxt_sw_rx_bd *rx_buf; + struct pci_dev *pdev; + struct xdp_buff xdp; + dma_addr_t mapping; + void *orig_data; + u32 tx_avail; + u32 offset; + u32 act; + + if (!xdp_prog) + return false; + + pdev = bp->pdev; + txr = rxr->bnapi->tx_ring; + rx_buf = &rxr->rx_buf_ring[cons]; + offset = bp->rx_offset; + + xdp.data_hard_start = *data_ptr - offset; + xdp.data = *data_ptr; + xdp.data_end = *data_ptr + *len; + orig_data = xdp.data; + mapping = rx_buf->mapping - bp->rx_dma_offset; + + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + + rcu_read_lock(); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + rcu_read_unlock(); + + tx_avail = bnxt_tx_avail(bp, txr); + /* If the tx ring is not full, we must not update the rx producer yet + * because we may still be transmitting on some BDs. + */ + if (tx_avail != bp->tx_ring_size) + *event &= ~BNXT_RX_EVENT; + + if (orig_data != xdp.data) { + offset = xdp.data - xdp.data_hard_start; + *data_ptr = xdp.data_hard_start + offset; + *len = xdp.data_end - xdp.data; + } + switch (act) { + case XDP_PASS: + return false; + + case XDP_TX: + if (tx_avail < 2) { + trace_xdp_exception(bp->dev, xdp_prog, act); + bnxt_reuse_rx_data(rxr, cons, page); + return true; + } + + *event = BNXT_TX_EVENT; + dma_sync_single_for_device(&pdev->dev, mapping + offset, *len, + bp->rx_dir); + bnxt_xmit_xdp(bp, txr, mapping + offset, *len, + NEXT_RX(rxr->rx_prod)); + bnxt_reuse_rx_data(rxr, cons, page); + return true; + default: + bpf_warn_invalid_xdp_action(act); + /* Fall thru */ + case XDP_ABORTED: + trace_xdp_exception(bp->dev, xdp_prog, act); + /* Fall thru */ + case XDP_DROP: + bnxt_reuse_rx_data(rxr, cons, page); + break; + } + return true; +} + +/* Under rtnl_lock */ +static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) +{ + struct net_device *dev = bp->dev; + int tx_xdp = 0, rc, tc; + struct bpf_prog *old; + + if (prog && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { + netdev_warn(dev, "MTU %d larger than largest XDP supported MTU %d.\n", + bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU); + return -EOPNOTSUPP; + } + if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) { + netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); + return -EOPNOTSUPP; + } + if (prog) + tx_xdp = bp->rx_nr_rings; + + tc = netdev_get_num_tc(dev); + if (!tc) + tc = 1; + rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings, + tc, tx_xdp); + if (rc) { + netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n"); + return rc; + } + if (netif_running(dev)) + bnxt_close_nic(bp, true, false); + + old = xchg(&bp->xdp_prog, prog); + if (old) + bpf_prog_put(old); + + if (prog) { + bnxt_set_rx_skb_mode(bp, true); + } else { + int rx, tx; + + bnxt_set_rx_skb_mode(bp, false); + bnxt_get_max_rings(bp, &rx, &tx, true); + if (rx > 1) { + bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS; + bp->dev->hw_features |= NETIF_F_LRO; + } + } + bp->tx_nr_rings_xdp = tx_xdp; + bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; + bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings); + bp->num_stat_ctxs = bp->cp_nr_rings; + bnxt_set_tpa_flags(bp); + bnxt_set_ring_params(bp); + + if (netif_running(dev)) + return bnxt_open_nic(bp, true, false); + + return 0; +} + +int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + struct bnxt *bp = netdev_priv(dev); + int rc; + + switch (xdp->command) { + case XDP_SETUP_PROG: + rc = bnxt_xdp_set(bp, xdp->prog); + break; + case XDP_QUERY_PROG: + xdp->prog_attached = !!bp->xdp_prog; + rc = 0; + break; + default: + rc = -EINVAL; + break; + } + return rc; +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h new file mode 100644 index 000000000000..b529f2c5355b --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -0,0 +1,19 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2016-2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_XDP_H +#define BNXT_XDP_H + +void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); +bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, + struct page *page, u8 **data_ptr, unsigned int *len, + u8 *event); +int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp); + +#endif diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index b1d2ac818710..cec94bbb2ea5 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -3665,7 +3665,7 @@ static int cnic_cm_destroy(struct cnic_sock *csk) static inline u16 cnic_get_vlan(struct net_device *dev, struct net_device **vlan_dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { *vlan_dev = vlan_dev_real_dev(dev); return vlan_dev_vlan_id(dev); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c index 5f226eda8cd6..52063587e1e9 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c @@ -351,7 +351,7 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, e->smt_idx = smt_idx; atomic_set(&e->refcnt, 1); neigh_replace(e, neigh); - if (neigh->dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(neigh->dev)) e->vlan = vlan_dev_vlan_id(neigh->dev); else e->vlan = VLAN_NONE; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f4f569060689..70590144be03 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1805,7 +1805,7 @@ static void check_neigh_update(struct neighbour *neigh) const struct device *parent; const struct net_device *netdev = neigh->dev; - if (netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(netdev)) netdev = vlan_dev_real_dev(netdev); parent = netdev->dev.parent; if (parent && parent->driver == &cxgb4_driver.driver) @@ -2111,7 +2111,7 @@ static int cxgb4_inet6addr_handler(struct notifier_block *this, #if IS_ENABLED(CONFIG_BONDING) struct adapter *adap; #endif - if (event_dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(event_dev)) event_dev = vlan_dev_real_dev(event_dev); #if IS_ENABLED(CONFIG_BONDING) if (event_dev->flags & IFF_MASTER) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 60a26037a1c6..7c8c5b9a3c22 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -432,7 +432,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, else lport = netdev2pinfo(physdev)->lport; - if (neigh->dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(neigh->dev)) vlan = vlan_dev_vlan_id(neigh->dev); else vlan = VLAN_NONE; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index de6c47744b8e..0f4d1697be46 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3927,6 +3927,25 @@ static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, return 0; } +static void mvneta_ethtool_get_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + wol->supported = 0; + wol->wolopts = 0; + + if (dev->phydev) + phy_ethtool_get_wol(dev->phydev, wol); +} + +static int mvneta_ethtool_set_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + if (!dev->phydev) + return -EOPNOTSUPP; + + return phy_ethtool_set_wol(dev->phydev, wol); +} + static const struct net_device_ops mvneta_netdev_ops = { .ndo_open = mvneta_open, .ndo_stop = mvneta_stop, @@ -3956,6 +3975,8 @@ const struct ethtool_ops mvneta_eth_tool_ops = { .set_rxfh = mvneta_ethtool_set_rxfh, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = mvneta_ethtool_set_link_ksettings, + .get_wol = mvneta_ethtool_get_wol, + .set_wol = mvneta_ethtool_set_wol, }; /* Initialize hw */ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 5053c949148f..4e36e287d605 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1395,7 +1395,7 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, gen_context); if (gen_context->flags & - (MLX4_FLAG_V_PPRX_MASK || MLX4_FLAG_V_PPTX_MASK)) + (MLX4_FLAG_V_PPRX_MASK | MLX4_FLAG_V_PPTX_MASK)) mlx4_en_set_port_global_pause(dev, slave, gen_context); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3cf5f484153..d87a82682cb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -731,8 +731,8 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, int ret; dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); - if (dst->error) { - ret = dst->error; + ret = dst->error; + if (ret) { dst_release(dst); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 76a7574c3c7d..ef23eaedc2ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -73,6 +73,7 @@ config MLXSW_SWITCHX2 config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q + depends on PSAMPLE || PSAMPLE=n select PARMAN default m ---help--- diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 57a98849551b..a4c07841aaf6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1901,11 +1901,11 @@ int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) } EXPORT_SYMBOL(mlxsw_core_schedule_dw); -int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay) +bool mlxsw_core_schedule_work(struct work_struct *work) { - return queue_delayed_work(mlxsw_owq, dwork, delay); + return queue_work(mlxsw_owq, work); } -EXPORT_SYMBOL(mlxsw_core_schedule_odw); +EXPORT_SYMBOL(mlxsw_core_schedule_work); void mlxsw_core_flush_owq(void) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index a7f94fbc898b..cf38cf9027f8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -207,7 +207,7 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, u8 local_port); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); -int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay); +bool mlxsw_core_schedule_work(struct work_struct *work); void mlxsw_core_flush_owq(void); #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index b50a312d89c1..0899e2d310e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3653,7 +3653,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) * Configures the properties for forwarding to CPU. */ #define MLXSW_REG_HTGT_ID 0x7002 -#define MLXSW_REG_HTGT_LEN 0x100 +#define MLXSW_REG_HTGT_LEN 0x20 MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8a52c860794b..c2a675125801 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1400,8 +1400,6 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, - .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, - .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4d251e06bf31..dcd641aff785 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -599,10 +599,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_neigh_construct(struct net_device *dev, - struct neighbour *n); -void mlxsw_sp_router_neigh_destroy(struct net_device *dev, - struct neighbour *n); int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9e494a446b7e..b19f69f4e28e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -613,9 +613,7 @@ struct mlxsw_sp_neigh_entry { struct rhash_head ht_node; struct mlxsw_sp_neigh_key key; u16 rif; - bool offloaded; - struct delayed_work dw; - struct mlxsw_sp_port *mlxsw_sp_port; + bool connected; unsigned char ha[ETH_ALEN]; struct list_head nexthop_list; /* list of nexthops using * this neigh entry @@ -629,105 +627,88 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { .key_len = sizeof(struct mlxsw_sp_neigh_key), }; -static int -mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_neigh_entry *neigh_entry) -{ - return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, - &neigh_entry->ht_node, - mlxsw_sp_neigh_ht_params); -} - -static void -mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_neigh_entry *neigh_entry) -{ - rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, - &neigh_entry->ht_node, - mlxsw_sp_neigh_ht_params); -} - -static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); - static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) +mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, + u16 rif) { struct mlxsw_sp_neigh_entry *neigh_entry; - neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL); if (!neigh_entry) return NULL; + neigh_entry->key.n = n; neigh_entry->rif = rif; - INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; } -static void -mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) { kfree(neigh_entry); } -static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { - struct mlxsw_sp_neigh_key key; + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} - key.n = n; - return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, - &key, mlxsw_sp_neigh_ht_params); +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); } -int mlxsw_sp_router_neigh_construct(struct net_device *dev, - struct neighbour *n) +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_rif *r; int err; - if (n->tbl != &arp_tbl) - return 0; - - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (neigh_entry) - return 0; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); - if (WARN_ON(!r)) - return -EINVAL; + if (!r) + return ERR_PTR(-EINVAL); - neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); + neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); if (!neigh_entry) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); if (err) goto err_neigh_entry_insert; - return 0; + + return neigh_entry; err_neigh_entry_insert: - mlxsw_sp_neigh_entry_destroy(neigh_entry); - return err; + mlxsw_sp_neigh_entry_free(neigh_entry); + return ERR_PTR(err); } -void mlxsw_sp_router_neigh_destroy(struct net_device *dev, - struct neighbour *n) +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_neigh_entry *neigh_entry; + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_free(neigh_entry); +} - if (n->tbl != &arp_tbl) - return; +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +{ + struct mlxsw_sp_neigh_key key; - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (!neigh_entry) - return; - mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); - mlxsw_sp_neigh_entry_destroy(neigh_entry); + key.n = n; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); } static void @@ -866,13 +847,11 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) /* Take RTNL mutex here to prevent lists from changes */ rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, - nexthop_neighs_list_node) { + nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. */ - if (!list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->key.n, NULL); - } + neigh_event_send(neigh_entry->key.n, NULL); rtnl_unlock(); } @@ -916,11 +895,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) */ rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, - nexthop_neighs_list_node) { - if (!(neigh_entry->key.n->nud_state & NUD_VALID) && - !list_empty(&neigh_entry->nexthop_list)) + nexthop_neighs_list_node) + if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); - } rtnl_unlock(); mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, @@ -932,79 +909,101 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool removing); -static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) +{ + return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD : + MLXSW_REG_RAUHT_OP_WRITE_DELETE; +} + +static void +mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) { - struct mlxsw_sp_neigh_entry *neigh_entry = - container_of(work, struct mlxsw_sp_neigh_entry, dw.work); struct neighbour *n = neigh_entry->key.n; - struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 dip = ntohl(*((__be32 *) n->primary_key)); char rauht_pl[MLXSW_REG_RAUHT_LEN]; - struct net_device *dev; + + mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static void +mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (!adding && !neigh_entry->connected) + return; + neigh_entry->connected = adding; + if (neigh_entry->key.n->tbl == &arp_tbl) + mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + else + WARN_ON_ONCE(1); +} + +struct mlxsw_sp_neigh_event_work { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + struct neighbour *n; +}; + +static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) +{ + struct mlxsw_sp_neigh_event_work *neigh_work = + container_of(work, struct mlxsw_sp_neigh_event_work, work); + struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct neighbour *n = neigh_work->n; + unsigned char ha[ETH_ALEN]; bool entry_connected; u8 nud_state, dead; - bool updating; - bool removing; - bool adding; - u32 dip; - int err; + /* If these parameters are changed after we release the lock, + * then we are guaranteed to receive another event letting us + * know about it. + */ read_lock_bh(&n->lock); - dip = ntohl(*((__be32 *) n->primary_key)); - memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; - dev = n->dev; read_unlock_bh(&n->lock); + rtnl_lock(); entry_connected = nud_state & NUD_VALID && !dead; - adding = (!neigh_entry->offloaded) && entry_connected; - updating = neigh_entry->offloaded && entry_connected; - removing = neigh_entry->offloaded && !entry_connected; - - if (adding || updating) { - mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, - neigh_entry->rif, - neigh_entry->ha, dip); - err = mlxsw_reg_write(mlxsw_sp->core, - MLXSW_REG(rauht), rauht_pl); - if (err) { - netdev_err(dev, "Could not add neigh %pI4h\n", &dip); - neigh_entry->offloaded = false; - } else { - neigh_entry->offloaded = true; - } - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); - } else if (removing) { - mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, - neigh_entry->rif, - neigh_entry->ha, dip); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), - rauht_pl); - if (err) { - netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); - neigh_entry->offloaded = true; - } else { - neigh_entry->offloaded = false; - } - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!entry_connected && !neigh_entry) + goto out; + if (!neigh_entry) { + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) + goto out; } + memcpy(neigh_entry->ha, ha, ETH_ALEN); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + +out: + rtnl_unlock(); neigh_release(n); - mlxsw_sp_port_dev_put(mlxsw_sp_port); + kfree(neigh_work); } int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_neigh_event_work *neigh_work; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp; unsigned long interval; - struct net_device *dev; struct neigh_parms *p; struct neighbour *n; - u32 dip; switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -1029,33 +1028,31 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, break; case NETEVENT_NEIGH_UPDATE: n = ptr; - dev = n->dev; if (n->tbl != &arp_tbl) return NOTIFY_DONE; - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); if (!mlxsw_sp_port) return NOTIFY_DONE; - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (WARN_ON(!neigh_entry)) { + neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); + if (!neigh_work) { mlxsw_sp_port_dev_put(mlxsw_sp_port); - return NOTIFY_DONE; + return NOTIFY_BAD; } - neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); + neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + neigh_work->n = n; /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { - neigh_release(n); - mlxsw_sp_port_dev_put(mlxsw_sp_port); - } + mlxsw_core_schedule_work(&neigh_work->work); + mlxsw_sp_port_dev_put(mlxsw_sp_port); break; } @@ -1336,14 +1333,11 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop *nh; - /* Take RTNL mutex here to prevent lists from changes */ - rtnl_lock(); list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } - rtnl_unlock(); } static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, @@ -1359,7 +1353,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, /* Take a reference of neigh here ensuring that neigh would * not be detructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or - * in neith_create() in case n is not found. + * in neigh_create() in case n is not found. */ n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); if (!n) { @@ -1370,8 +1364,11 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, } neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!neigh_entry) { - neigh_release(n); - return -EINVAL; + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) { + neigh_release(n); + return -EINVAL; + } } /* If that is the first nexthop connected to that neigh, add to @@ -1397,6 +1394,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct neighbour *n = neigh_entry->key.n; __mlxsw_sp_nexthop_neigh_update(nh, true); list_del(&nh->neigh_list_node); @@ -1407,7 +1405,10 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, if (list_empty(&nh->neigh_entry->nexthop_list)) list_del(&nh->neigh_entry->nexthop_neighs_list_node); - neigh_release(neigh_entry->key.n); + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + + neigh_release(n); } static struct mlxsw_sp_nexthop_group * @@ -1964,7 +1965,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) } struct mlxsw_sp_fib_event_work { - struct delayed_work dw; + struct work_struct work; struct fib_entry_notifier_info fen_info; struct mlxsw_sp *mlxsw_sp; unsigned long event; @@ -1973,7 +1974,7 @@ struct mlxsw_sp_fib_event_work { static void mlxsw_sp_router_fib_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, dw.work); + container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; @@ -2014,7 +2015,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, if (WARN_ON(!fib_work)) return NOTIFY_BAD; - INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work); + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); fib_work->mlxsw_sp = mlxsw_sp; fib_work->event = event; @@ -2029,7 +2030,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; } - mlxsw_core_schedule_odw(&fib_work->dw, 0); + mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 0cf8a3703275..3b5d7cfa2321 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -3264,7 +3264,7 @@ netxen_list_config_ip(struct netxen_adapter *adapter, cur = kzalloc(sizeof(struct nx_ip_list), GFP_ATOMIC); if (cur == NULL) goto out; - if (dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); cur->master = !!netif_is_bond_master(dev); cur->ip_addr = ifa->ifa_address; @@ -3374,7 +3374,7 @@ static void netxen_config_master(struct net_device *dev, unsigned long event) !netif_is_bond_slave(dev)) { netxen_config_indev_addr(adapter, master, event); for_each_netdev_rcu(&init_net, slave) - if (slave->priv_flags & IFF_802_1Q_VLAN && + if (is_vlan_dev(slave) && vlan_dev_real_dev(slave) == master) netxen_config_indev_addr(adapter, slave, event); } @@ -3400,7 +3400,7 @@ recheck: if (dev == NULL) goto done; - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { dev = vlan_dev_real_dev(dev); goto recheck; } @@ -3445,7 +3445,7 @@ recheck: if (dev == NULL) goto done; - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { dev = vlan_dev_real_dev(dev); goto recheck; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 4c0cce962585..b6628aaa6e4a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -4220,7 +4220,7 @@ recheck: if (dev == NULL) goto done; - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { dev = vlan_dev_real_dev(dev); goto recheck; } @@ -4256,7 +4256,7 @@ recheck: if (dev == NULL) goto done; - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { dev = vlan_dev_real_dev(dev); goto recheck; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c index cfc57d2c64f9..c418a6e9a591 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c @@ -148,16 +148,26 @@ static void emac_get_ringparam(struct net_device *netdev, static void emac_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { - struct phy_device *phydev = netdev->phydev; + struct emac_adapter *adpt = netdev_priv(netdev); - if (phydev) { - if (phydev->autoneg) - pause->autoneg = 1; - if (phydev->pause) - pause->rx_pause = 1; - if (phydev->pause != phydev->asym_pause) - pause->tx_pause = 1; - } + pause->autoneg = adpt->automatic ? AUTONEG_ENABLE : AUTONEG_DISABLE; + pause->rx_pause = adpt->rx_flow_control ? 1 : 0; + pause->tx_pause = adpt->tx_flow_control ? 1 : 0;; +} + +static int emac_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + adpt->automatic = pause->autoneg == AUTONEG_ENABLE; + adpt->rx_flow_control = pause->rx_pause != 0; + adpt->tx_flow_control = pause->tx_pause != 0; + + if (netif_running(netdev)) + return emac_reinit_locked(adpt); + + return 0; } static const struct ethtool_ops emac_ethtool_ops = { @@ -172,7 +182,9 @@ static const struct ethtool_ops emac_ethtool_ops = { .get_ethtool_stats = emac_get_ethtool_stats, .get_ringparam = emac_get_ringparam, + .get_pauseparam = emac_get_pauseparam, + .set_pauseparam = emac_set_pauseparam, .nway_reset = emac_nway_reset, diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index b991219862b1..4b3e0144ad5e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -565,11 +565,19 @@ static void emac_mac_start(struct emac_adapter *adpt) mac |= TXEN | RXEN; /* enable RX/TX */ - /* Configure MAC flow control to match the PHY's settings. */ - if (phydev->pause) - mac |= RXFC; - if (phydev->pause != phydev->asym_pause) - mac |= TXFC; + /* Configure MAC flow control. If set to automatic, then match + * whatever the PHY does. Otherwise, enable or disable it, depending + * on what the user configured via ethtool. + */ + mac &= ~(RXFC | TXFC); + + if (adpt->automatic) { + /* If it's set to automatic, then update our local values */ + adpt->rx_flow_control = phydev->pause; + adpt->tx_flow_control = phydev->pause != phydev->asym_pause; + } + mac |= adpt->rx_flow_control ? RXFC : 0; + mac |= adpt->tx_flow_control ? TXFC : 0; /* setup link speed */ mac &= ~SPEED_MASK; diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 3387c0a88746..28a8cdc36485 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -436,6 +436,10 @@ static void emac_init_adapter(struct emac_adapter *adpt) { u32 reg; + adpt->rrd_size = EMAC_RRD_SIZE; + adpt->tpd_size = EMAC_TPD_SIZE; + adpt->rfd_size = EMAC_RFD_SIZE; + /* descriptors */ adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS; adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS; @@ -456,6 +460,9 @@ static void emac_init_adapter(struct emac_adapter *adpt) /* others */ adpt->preamble = EMAC_PREAMBLE_DEF; + + /* default to automatic flow control */ + adpt->automatic = true; } /* Get the clock */ @@ -675,10 +682,6 @@ static int emac_probe(struct platform_device *pdev) netdev->watchdog_timeo = EMAC_WATCHDOG_TIME; netdev->irq = adpt->irq.irq; - adpt->rrd_size = EMAC_RRD_SIZE; - adpt->tpd_size = EMAC_TPD_SIZE; - adpt->rfd_size = EMAC_RFD_SIZE; - netdev->netdev_ops = &emac_netdev_ops; emac_init_adapter(adpt); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h index ef91dcc7f646..e77fb6966cbb 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.h +++ b/drivers/net/ethernet/qualcomm/emac/emac.h @@ -306,6 +306,13 @@ struct emac_adapter { unsigned int rxbuf_size; + /* Flow control / pause frames support. If automatic=True, do whatever + * the PHY does. Otherwise, use tx_flow_control and rx_flow_control. + */ + bool automatic; + bool tx_flow_control; + bool rx_flow_control; + /* Ring parameter */ u8 tpd_burst; u8 rfd_burst; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index fcd4eeecfef4..c28cd9daf949 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -328,8 +328,8 @@ static int efx_poll(struct napi_struct *napi, int budget) * since efx_nic_eventq_read_ack() will have no effect if * interrupts have already been disabled. */ - napi_complete_done(napi, spent); - efx_nic_eventq_read_ack(channel); + if (napi_complete_done(napi, spent)) + efx_nic_eventq_read_ack(channel); } return spent; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 35a95dcc755b..4d1c0c3042c7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1601,8 +1601,6 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - netif_trans_update(ndev); - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 72b0c1f7496e..425285f36595 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1605,7 +1605,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; /* Avoid Vlan dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(event_dev)) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 356859ac7c18..407b0b601ea8 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,6 +1,7 @@ # Makefile for Linux PHY drivers and MDIO bus drivers -libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o +libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o \ + mdio-boardinfo.o libphy-$(CONFIG_SWPHY) += swphy.o libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index aa01020ab1b9..d1c2614dad3a 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -453,6 +453,7 @@ static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7278, "Broadcom BCM7278"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"), + BCM7XXX_28NM_GPHY(PHY_ID_BCM74371, "Broadcom BCM74371"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), @@ -472,6 +473,7 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7362, 0xfffffff0, }, { PHY_ID_BCM7425, 0xfffffff0, }, { PHY_ID_BCM7429, 0xfffffff0, }, + { PHY_ID_BCM74371, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, { PHY_ID_BCM7435, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c new file mode 100644 index 000000000000..6b988f77da08 --- /dev/null +++ b/drivers/net/phy/mdio-boardinfo.c @@ -0,0 +1,86 @@ +/* + * mdio-boardinfo - Collect pre-declarations for MDIO devices + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/mutex.h> +#include <linux/list.h> + +#include "mdio-boardinfo.h" + +static LIST_HEAD(mdio_board_list); +static DEFINE_MUTEX(mdio_board_lock); + +/** + * mdiobus_setup_mdiodev_from_board_info - create and setup MDIO devices + * from pre-collected board specific MDIO information + * @mdiodev: MDIO device pointer + * Context: can sleep + */ +void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus) +{ + struct mdio_board_entry *be; + struct mdio_device *mdiodev; + struct mdio_board_info *bi; + int ret; + + mutex_lock(&mdio_board_lock); + list_for_each_entry(be, &mdio_board_list, list) { + bi = &be->board_info; + + if (strcmp(bus->id, bi->bus_id)) + continue; + + mdiodev = mdio_device_create(bus, bi->mdio_addr); + if (IS_ERR(mdiodev)) + continue; + + strncpy(mdiodev->modalias, bi->modalias, + sizeof(mdiodev->modalias)); + mdiodev->bus_match = mdio_device_bus_match; + mdiodev->dev.platform_data = (void *)bi->platform_data; + + ret = mdio_device_register(mdiodev); + if (ret) { + mdio_device_free(mdiodev); + continue; + } + } + mutex_unlock(&mdio_board_lock); +} + +/** + * mdio_register_board_info - register MDIO devices for a given board + * @info: array of devices descriptors + * @n: number of descriptors provided + * Context: can sleep + * + * The board info passed can be marked with __initdata but be pointers + * such as platform_data etc. are copied as-is + */ +int mdiobus_register_board_info(const struct mdio_board_info *info, + unsigned int n) +{ + struct mdio_board_entry *be; + unsigned int i; + + be = kcalloc(n, sizeof(*be), GFP_KERNEL); + if (!be) + return -ENOMEM; + + for (i = 0; i < n; i++, be++, info++) { + memcpy(&be->board_info, info, sizeof(*info)); + mutex_lock(&mdio_board_lock); + list_add_tail(&be->list, &mdio_board_list); + mutex_unlock(&mdio_board_lock); + } + + return 0; +} diff --git a/drivers/net/phy/mdio-boardinfo.h b/drivers/net/phy/mdio-boardinfo.h new file mode 100644 index 000000000000..00f98163e90e --- /dev/null +++ b/drivers/net/phy/mdio-boardinfo.h @@ -0,0 +1,19 @@ +/* + * mdio-boardinfo.h - board info interface internal to the mdio_bus + * component + */ + +#ifndef __MDIO_BOARD_INFO_H +#define __MDIO_BOARD_INFO_H + +#include <linux/phy.h> +#include <linux/mutex.h> + +struct mdio_board_entry { + struct list_head list; + struct mdio_board_info board_info; +}; + +void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus); + +#endif /* __MDIO_BOARD_INFO_H */ diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 653d076eafe5..fa7d51f14869 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -41,6 +41,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/mdio.h> +#include "mdio-boardinfo.h" + int mdiobus_register_device(struct mdio_device *mdiodev) { if (mdiodev->bus->mdio_map[mdiodev->addr]) @@ -343,6 +345,8 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) } } + mdiobus_setup_mdiodev_from_board_info(bus); + bus->state = MDIOBUS_REGISTERED; pr_info("%s: probed\n", bus->name); return 0; diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index fc3aaaa36b1d..e24f28924af8 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -34,6 +34,17 @@ static void mdio_device_release(struct device *dev) kfree(to_mdio_device(dev)); } +int mdio_device_bus_match(struct device *dev, struct device_driver *drv) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) + return 0; + + return strcmp(mdiodev->modalias, drv->name) == 0; +} + struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr) { struct mdio_device *mdiodev; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a3711769544b..4a24b5d15f5a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2001,8 +2001,6 @@ static const struct net_device_ops team_netdev_ops = { .ndo_add_slave = team_add_slave, .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, - .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, - .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_change_carrier = team_change_carrier, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 29982c7f6080..11e28530c83c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -41,6 +41,9 @@ module_param(gso, bool, 0444); #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_COPY_LEN 128 +/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ +#define VIRTIO_XDP_HEADROOM 256 + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -340,15 +343,19 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, static bool virtnet_xdp_xmit(struct virtnet_info *vi, struct receive_queue *rq, - struct send_queue *sq, struct xdp_buff *xdp, void *data) { struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int num_sg, len; + struct send_queue *sq; + unsigned int qp; void *xdp_sent; int err; + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + /* Free up any pending old buffers before queueing new ones. */ while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (vi->mergeable_rx_bufs) { @@ -363,6 +370,7 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, } if (vi->mergeable_rx_bufs) { + xdp->data -= sizeof(struct virtio_net_hdr_mrg_rxbuf); /* Zero header and leave csum up to XDP layers */ hdr = xdp->data; memset(hdr, 0, vi->hdr_len); @@ -379,7 +387,9 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, num_sg = 2; sg_init_table(sq->sg, 2); sg_set_buf(sq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); + skb_to_sgvec(skb, sq->sg + 1, + xdp->data - xdp->data_hard_start, + xdp->data_end - xdp->data); } err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, data, GFP_ATOMIC); @@ -398,52 +408,6 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, return true; } -static u32 do_xdp_prog(struct virtnet_info *vi, - struct receive_queue *rq, - struct bpf_prog *xdp_prog, - void *data, int len) -{ - int hdr_padded_len; - struct xdp_buff xdp; - void *buf; - unsigned int qp; - u32 act; - - if (vi->mergeable_rx_bufs) { - hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); - xdp.data = data + hdr_padded_len; - xdp.data_end = xdp.data + (len - vi->hdr_len); - buf = data; - } else { /* small buffers */ - struct sk_buff *skb = data; - - xdp.data = skb->data; - xdp.data_end = xdp.data + len; - buf = skb->data; - } - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - switch (act) { - case XDP_PASS: - return XDP_PASS; - case XDP_TX: - qp = vi->curr_queue_pairs - - vi->xdp_queue_pairs + - smp_processor_id(); - xdp.data = buf; - if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, - data))) - trace_xdp_exception(vi->dev, xdp_prog, act); - return XDP_TX; - default: - bpf_warn_invalid_xdp_action(act); - case XDP_ABORTED: - trace_xdp_exception(vi->dev, xdp_prog, act); - case XDP_DROP: - return XDP_DROP; - } -} - static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -453,30 +417,44 @@ static struct sk_buff *receive_small(struct net_device *dev, struct bpf_prog *xdp_prog; len -= vi->hdr_len; - skb_trim(skb, len); rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + struct xdp_buff xdp; u32 act; if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, skb, len); + + xdp.data_hard_start = skb->data; + xdp.data = skb->data + VIRTIO_XDP_HEADROOM; + xdp.data_end = xdp.data + len; + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { case XDP_PASS: + /* Recalculate length in case bpf program changed it */ + __skb_pull(skb, xdp.data - xdp.data_hard_start); + len = xdp.data_end - xdp.data; break; case XDP_TX: + if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, skb))) + trace_xdp_exception(vi->dev, xdp_prog, act); rcu_read_unlock(); goto xdp_xmit; - case XDP_DROP: default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); + case XDP_DROP: goto err_xdp; } } rcu_read_unlock(); + skb_trim(skb, len); return skb; err_xdp: @@ -525,7 +503,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, unsigned int *len) { struct page *page = alloc_page(GFP_ATOMIC); - unsigned int page_off = 0; + unsigned int page_off = VIRTIO_XDP_HEADROOM; if (!page) return NULL; @@ -561,7 +539,8 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, put_page(p); } - *len = page_off; + /* Headroom does not contribute to packet length */ + *len = page_off - VIRTIO_XDP_HEADROOM; return page; err_buf: __free_pages(page, 0); @@ -589,6 +568,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct page *xdp_page; + struct xdp_buff xdp; + void *data; u32 act; /* This happens when rx buffer size is underestimated */ @@ -598,7 +579,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, page, offset, &len); if (!xdp_page) goto err_xdp; - offset = 0; + offset = VIRTIO_XDP_HEADROOM; } else { xdp_page = page; } @@ -611,28 +592,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; - act = do_xdp_prog(vi, rq, xdp_prog, - page_address(xdp_page) + offset, len); + /* Allow consuming headroom but reserve enough space to push + * the descriptor on if we get an XDP_TX return code. + */ + data = page_address(xdp_page) + offset; + xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; + xdp.data = data + vi->hdr_len; + xdp.data_end = xdp.data + (len - vi->hdr_len); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { case XDP_PASS: + /* recalculate offset to account for any header + * adjustments. Note other cases do not build an + * skb and avoid using offset + */ + offset = xdp.data - + page_address(xdp_page) - vi->hdr_len; + /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, - 0, len, PAGE_SIZE); + offset, len, PAGE_SIZE); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } break; case XDP_TX: + if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, data))) + trace_xdp_exception(vi->dev, xdp_prog, act); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); goto xdp_xmit; - case XDP_DROP: default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); + case XDP_DROP: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); @@ -782,23 +782,30 @@ frame_err: dev_kfree_skb(skb); } +static unsigned int virtnet_get_headroom(struct virtnet_info *vi) +{ + return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; +} + static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { + int headroom = GOOD_PACKET_LEN + virtnet_get_headroom(vi); + unsigned int xdp_headroom = virtnet_get_headroom(vi); struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; int err; - skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); + skb = __netdev_alloc_skb_ip_align(vi->dev, headroom, gfp); if (unlikely(!skb)) return -ENOMEM; - skb_put(skb, GOOD_PACKET_LEN); + skb_put(skb, headroom); hdr = skb_vnet_hdr(skb); sg_init_table(rq->sg, 2); sg_set_buf(rq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + skb_to_sgvec(skb, rq->sg + 1, xdp_headroom, skb->len - xdp_headroom); err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); if (err < 0) @@ -866,24 +873,27 @@ static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len) return ALIGN(len, MERGEABLE_BUFFER_ALIGN); } -static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) +static int add_recvbuf_mergeable(struct virtnet_info *vi, + struct receive_queue *rq, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; + unsigned int headroom = virtnet_get_headroom(vi); char *buf; unsigned long ctx; int err; unsigned int len, hole; len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); - if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) + if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + buf += headroom; /* advance address leaving hole at front of pkt */ ctx = mergeable_buf_to_ctx(buf, len); get_page(alloc_frag->page); - alloc_frag->offset += len; + alloc_frag->offset += len + headroom; hole = alloc_frag->size - alloc_frag->offset; - if (hole < len) { + if (hole < len + headroom) { /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. This extra space is not included in @@ -917,7 +927,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, gfp |= __GFP_COLD; do { if (vi->mergeable_rx_bufs) - err = add_recvbuf_mergeable(rq, gfp); + err = add_recvbuf_mergeable(vi, rq, gfp); else if (vi->big_packets) err = add_recvbuf_big(vi, rq, gfp); else @@ -1305,7 +1315,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) rtnl_unlock(); } -static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct scatterlist sg; struct net_device *dev = vi->dev; @@ -1331,6 +1341,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) return 0; } +static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +{ + int err; + + rtnl_lock(); + err = _virtnet_set_queues(vi, queue_pairs); + rtnl_unlock(); + return err; +} + static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -1583,7 +1603,7 @@ static int virtnet_set_channels(struct net_device *dev, return -EINVAL; get_online_cpus(); - err = virtnet_set_queues(vi, queue_pairs); + err = _virtnet_set_queues(vi, queue_pairs); if (!err) { netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); @@ -1673,19 +1693,91 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_settings = virtnet_set_settings, }; +static void virtnet_freeze_down(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + int i; + + /* Make sure no work handler is accessing the device */ + flush_work(&vi->config_work); + + netif_device_detach(vi->dev); + cancel_delayed_work_sync(&vi->refill); + + if (netif_running(vi->dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); + } +} + +static int init_vqs(struct virtnet_info *vi); +static void _remove_vq_common(struct virtnet_info *vi); + +static int virtnet_restore_up(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + int err, i; + + err = init_vqs(vi); + if (err) + return err; + + virtio_device_ready(vdev); + + if (netif_running(vi->dev)) { + for (i = 0; i < vi->curr_queue_pairs; i++) + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(&vi->rq[i]); + } + + netif_device_attach(vi->dev); + return err; +} + +static int virtnet_reset(struct virtnet_info *vi) +{ + struct virtio_device *dev = vi->vdev; + int ret; + + virtio_config_disable(dev); + dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; + virtnet_freeze_down(dev); + _remove_vq_common(vi); + + dev->config->reset(dev); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); + + ret = virtio_finalize_features(dev); + if (ret) + goto err; + + ret = virtnet_restore_up(dev); + if (ret) + goto err; + ret = _virtnet_set_queues(vi, vi->curr_queue_pairs); + if (ret) + goto err; + + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + virtio_config_enable(dev); + return 0; +err: + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); + return ret; +} + static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) { unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; - u16 xdp_qp = 0, curr_qp; + u16 oxdp_qp, xdp_qp = 0, curr_qp; int i, err; - if (prog && prog->xdp_adjust_head) { - netdev_warn(dev, "Does not support bpf_xdp_adjust_head()\n"); - return -EOPNOTSUPP; - } - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || @@ -1715,21 +1807,32 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) return -ENOMEM; } - err = virtnet_set_queues(vi, curr_qp + xdp_qp); + if (prog) { + prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) { dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); - return err; + goto virtio_queue_err; } - if (prog) { - prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); - if (IS_ERR(prog)) { - virtnet_set_queues(vi, curr_qp); - return PTR_ERR(prog); - } + oxdp_qp = vi->xdp_queue_pairs; + + /* Changing the headroom in buffers is a disruptive operation because + * existing buffers must be flushed and reallocated. This will happen + * when a xdp program is initially added or xdp is disabled by removing + * the xdp program resulting in number of XDP queues changing. + */ + if (vi->xdp_queue_pairs != xdp_qp) { + vi->xdp_queue_pairs = xdp_qp; + err = virtnet_reset(vi); + if (err) + goto virtio_reset_err; } - vi->xdp_queue_pairs = xdp_qp; netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1740,6 +1843,21 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) } return 0; + +virtio_reset_err: + /* On reset error do our best to unwind XDP changes inflight and return + * error up to user space for resolution. The underlying reset hung on + * us so not much we can do here. + */ + dev_warn(&dev->dev, "XDP reset failure and queues unstable\n"); + vi->xdp_queue_pairs = oxdp_qp; +virtio_queue_err: + /* On queue set error we can unwind bpf ref count and user space can + * retry this is most likely an allocation failure. + */ + if (prog) + bpf_prog_sub(prog, vi->max_queue_pairs - 1); + return err; } static bool virtnet_xdp_query(struct net_device *dev) @@ -1840,12 +1958,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) kfree(vi->sq); } -static void free_receive_bufs(struct virtnet_info *vi) +static void _free_receive_bufs(struct virtnet_info *vi) { struct bpf_prog *old_prog; int i; - rtnl_lock(); for (i = 0; i < vi->max_queue_pairs; i++) { while (vi->rq[i].pages) __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); @@ -1855,6 +1972,12 @@ static void free_receive_bufs(struct virtnet_info *vi) if (old_prog) bpf_prog_put(old_prog); } +} + +static void free_receive_bufs(struct virtnet_info *vi) +{ + rtnl_lock(); + _free_receive_bufs(vi); rtnl_unlock(); } @@ -2293,9 +2416,7 @@ static int virtnet_probe(struct virtio_device *vdev) goto free_unregister_netdev; } - rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); - rtnl_unlock(); /* Assume link up if device can't report link status, otherwise get link status from config. */ @@ -2327,6 +2448,15 @@ free: return err; } +static void _remove_vq_common(struct virtnet_info *vi) +{ + vi->vdev->config->reset(vi->vdev); + free_unused_bufs(vi); + _free_receive_bufs(vi); + free_receive_page_frags(vi); + virtnet_del_vqs(vi); +} + static void remove_vq_common(struct virtnet_info *vi) { vi->vdev->config->reset(vi->vdev); @@ -2362,21 +2492,9 @@ static void virtnet_remove(struct virtio_device *vdev) static int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; virtnet_cpu_notif_remove(vi); - - /* Make sure no work handler is accessing the device */ - flush_work(&vi->config_work); - - netif_device_detach(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) - napi_disable(&vi->rq[i].napi); - } - + virtnet_freeze_down(vdev); remove_vq_common(vi); return 0; @@ -2385,28 +2503,12 @@ static int virtnet_freeze(struct virtio_device *vdev) static int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; - err = init_vqs(vi); + err = virtnet_restore_up(vdev); if (err) return err; - - virtio_device_ready(vdev); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(&vi->rq[i]); - } - - netif_device_attach(vi->dev); - - rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); - rtnl_unlock(); err = virtnet_cpu_notif_add(vi); if (err) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 264fc1585b3c..630eafdb79e8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -378,6 +378,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return ret; @@ -574,8 +575,10 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (!IS_ERR(neigh)) + if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); + } rcu_read_unlock_bh(); err: diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 9d9b4e0def2a..1f6bc8791d51 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -241,7 +241,6 @@ static struct spi_driver slic_ds26522_driver = { .driver = { .name = "ds26522", .bus = &spi_bus_type, - .owner = THIS_MODULE, .of_match_table = slic_ds26522_match, }, .probe = slic_ds26522_probe, @@ -249,15 +248,4 @@ static struct spi_driver slic_ds26522_driver = { .id_table = slic_ds26522_id, }; -static int __init slic_ds26522_init(void) -{ - return spi_register_driver(&slic_ds26522_driver); -} - -static void __exit slic_ds26522_exit(void) -{ - spi_unregister_driver(&slic_ds26522_driver); -} - -module_init(slic_ds26522_init); -module_exit(slic_ds26522_exit); +module_spi_driver(slic_ds26522_driver); diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index c639d5a02656..4b9e7761a97b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2282,7 +2282,7 @@ static int _bnx2fc_create(struct net_device *netdev, } /* obtain physical netdev */ - if (netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(netdev)) phys_dev = vlan_dev_real_dev(netdev); /* verify if the physical device is a netxtreme2 device */ @@ -2320,7 +2320,7 @@ static int _bnx2fc_create(struct net_device *netdev, goto ifput_err; } - if (netdev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(netdev)) { vlan_id = vlan_dev_vlan_id(netdev); interface->vlan_enabled = 1; } @@ -2538,7 +2538,7 @@ static bool bnx2fc_match(struct net_device *netdev) struct net_device *phys_dev = netdev; mutex_lock(&bnx2fc_dev_lock); - if (netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(netdev)) phys_dev = vlan_dev_real_dev(netdev); if (bnx2fc_hba_lookup(phys_dev)) { diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 9167bcd9fffe..bd7d39ecbd24 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -223,7 +223,7 @@ struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev, struct cxgbi_device *cdev, *tmp; int i; - if (ndev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(ndev)) { vdev = ndev; ndev = vlan_dev_real_dev(ndev); log_debug(1 << CXGBI_DBG_DEV, @@ -256,7 +256,7 @@ struct cxgbi_device *cxgbi_device_find_by_netdev_rcu(struct net_device *ndev, struct cxgbi_device *cdev; int i; - if (ndev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(ndev)) { vdev = ndev; ndev = vlan_dev_real_dev(ndev); pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name); @@ -290,7 +290,7 @@ static struct cxgbi_device *cxgbi_device_find_by_mac(struct net_device *ndev, struct cxgbi_device *cdev, *tmp; int i; - if (ndev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(ndev)) { vdev = ndev; ndev = vlan_dev_real_dev(ndev); pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 59150cad0353..79160ffae483 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -326,8 +326,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, /* look for SAN MAC address, if multiple SAN MACs exist, only * use the first one for SPMA */ - real_dev = (netdev->priv_flags & IFF_802_1Q_VLAN) ? - vlan_dev_real_dev(netdev) : netdev; + real_dev = is_vlan_dev(netdev) ? vlan_dev_real_dev(netdev) : netdev; fcoe->realdev = real_dev; rcu_read_lock(); for_each_dev_addr(real_dev, ha) { @@ -730,7 +729,7 @@ static int fcoe_netdev_config(struct fc_lport *lport, struct net_device *netdev) ctlr = fcoe_to_ctlr(fcoe); /* Figure out the VLAN ID, if any */ - if (netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(netdev)) lport->vlan = vlan_dev_vlan_id(netdev); else lport->vlan = 0; @@ -959,13 +958,13 @@ static inline int fcoe_em_config(struct fc_lport *lport) * Reuse existing offload em instance in case * it is already allocated on real eth device */ - if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(fcoe->netdev)) cur_real_dev = vlan_dev_real_dev(fcoe->netdev); else cur_real_dev = fcoe->netdev; list_for_each_entry(oldfcoe, &fcoe_hostlist, list) { - if (oldfcoe->netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(oldfcoe->netdev)) old_real_dev = vlan_dev_real_dev(oldfcoe->netdev); else old_real_dev = oldfcoe->netdev; @@ -1563,7 +1562,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp) skb->protocol = htons(ETH_P_FCOE); skb->priority = fcoe->priority; - if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN && + if (is_vlan_dev(fcoe->netdev) && fcoe->realdev->features & NETIF_F_HW_VLAN_CTAG_TX) { /* must set skb->dev before calling vlan_put_tag */ skb->dev = fcoe->realdev; @@ -1794,7 +1793,7 @@ fcoe_hostlist_lookup_realdev_port(struct net_device *netdev) struct net_device *real_dev; list_for_each_entry(fcoe, &fcoe_hostlist, list) { - if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(fcoe->netdev)) real_dev = vlan_dev_real_dev(fcoe->netdev); else real_dev = fcoe->netdev; diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7062bb0975a5..400d70b69379 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -100,11 +100,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) dev->id.device, dev->id.vendor); } -static void add_status(struct virtio_device *dev, unsigned status) -{ - dev->config->set_status(dev, dev->config->get_status(dev) | status); -} - void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit) { @@ -145,14 +140,15 @@ void virtio_config_changed(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_config_changed); -static void virtio_config_disable(struct virtio_device *dev) +void virtio_config_disable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = false; spin_unlock_irq(&dev->config_lock); } +EXPORT_SYMBOL_GPL(virtio_config_disable); -static void virtio_config_enable(struct virtio_device *dev) +void virtio_config_enable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = true; @@ -161,8 +157,15 @@ static void virtio_config_enable(struct virtio_device *dev) dev->config_change_pending = false; spin_unlock_irq(&dev->config_lock); } +EXPORT_SYMBOL_GPL(virtio_config_enable); + +void virtio_add_status(struct virtio_device *dev, unsigned int status) +{ + dev->config->set_status(dev, dev->config->get_status(dev) | status); +} +EXPORT_SYMBOL_GPL(virtio_add_status); -static int virtio_finalize_features(struct virtio_device *dev) +int virtio_finalize_features(struct virtio_device *dev) { int ret = dev->config->finalize_features(dev); unsigned status; @@ -173,7 +176,7 @@ static int virtio_finalize_features(struct virtio_device *dev) if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) return 0; - add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + virtio_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); status = dev->config->get_status(dev); if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { dev_err(&dev->dev, "virtio: device refuses features: %x\n", @@ -182,6 +185,7 @@ static int virtio_finalize_features(struct virtio_device *dev) } return 0; } +EXPORT_SYMBOL_GPL(virtio_finalize_features); static int virtio_dev_probe(struct device *_d) { @@ -193,7 +197,7 @@ static int virtio_dev_probe(struct device *_d) u64 driver_features_legacy; /* We have a driver! */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); /* Figure out what features the device supports. */ device_features = dev->config->get_features(dev); @@ -247,7 +251,7 @@ static int virtio_dev_probe(struct device *_d) return 0; err: - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; } @@ -265,7 +269,7 @@ static int virtio_dev_remove(struct device *_d) WARN_ON_ONCE(dev->config->get_status(dev)); /* Acknowledge the device's existence again. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); return 0; } @@ -316,7 +320,7 @@ int register_virtio_device(struct virtio_device *dev) dev->config->reset(dev); /* Acknowledge that we've seen the device. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); INIT_LIST_HEAD(&dev->vqs); @@ -325,7 +329,7 @@ int register_virtio_device(struct virtio_device *dev) err = device_register(&dev->dev); out: if (err) - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; } EXPORT_SYMBOL_GPL(register_virtio_device); @@ -365,18 +369,18 @@ int virtio_device_restore(struct virtio_device *dev) dev->config->reset(dev); /* Acknowledge that we've seen the device. */ - add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); /* Maybe driver failed before freeze. * Restore the failed status, for debugging. */ if (dev->failed) - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); if (!drv) return 0; /* We have a driver! */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); ret = virtio_finalize_features(dev); if (ret) @@ -389,14 +393,14 @@ int virtio_device_restore(struct virtio_device *dev) } /* Finally, tell the device we're all set */ - add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); virtio_config_enable(dev); return 0; err: - add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; } EXPORT_SYMBOL_GPL(virtio_device_restore); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 5881d1fdc1fb..55e517130311 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -33,6 +33,7 @@ #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 +#define PHY_ID_BCM74371 0xae0252e0 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h new file mode 100644 index 000000000000..cb31683bbe15 --- /dev/null +++ b/include/linux/can/rx-offload.h @@ -0,0 +1,59 @@ +/* + * linux/can/rx-offload.h + * + * Copyright (c) 2014 David Jander, Protonic Holland + * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CAN_RX_OFFLOAD_H +#define _CAN_RX_OFFLOAD_H + +#include <linux/netdevice.h> +#include <linux/can.h> + +struct can_rx_offload { + struct net_device *dev; + + unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, + u32 *timestamp, unsigned int mb); + + struct sk_buff_head skb_queue; + u32 skb_queue_len_max; + + unsigned int mb_first; + unsigned int mb_last; + + struct napi_struct napi; + + bool inc; +}; + +int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload); +int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); +int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); +int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +void can_rx_offload_reset(struct can_rx_offload *offload); +void can_rx_offload_del(struct can_rx_offload *offload); +void can_rx_offload_enable(struct can_rx_offload *offload); + +static inline void can_rx_offload_schedule(struct can_rx_offload *offload) +{ + napi_schedule(&offload->napi); +} + +static inline void can_rx_offload_disable(struct can_rx_offload *offload) +{ + napi_disable(&offload->napi); +} + +#endif /* !_CAN_RX_OFFLOAD_H */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 55a80d73cfc1..ca08ab16ecdc 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -10,6 +10,7 @@ #define __LINUX_MDIO_H__ #include <uapi/linux/mdio.h> +#include <linux/mod_devicetable.h> struct mii_bus; @@ -29,6 +30,7 @@ struct mdio_device { const struct dev_pm_ops *pm_ops; struct mii_bus *bus; + char modalias[MDIO_NAME_SIZE]; int (*bus_match)(struct device *dev, struct device_driver *drv); void (*device_free)(struct mdio_device *mdiodev); @@ -71,6 +73,7 @@ int mdio_device_register(struct mdio_device *mdiodev); void mdio_device_remove(struct mdio_device *mdiodev); int mdio_driver_register(struct mdio_driver *drv); void mdio_driver_unregister(struct mdio_driver *drv); +int mdio_device_bus_match(struct device *dev, struct device_driver *drv); static inline bool mdio_phy_id_is_c45(int phy_id) { diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8a57f0b1242d..8850fcaf50db 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -501,6 +501,7 @@ struct platform_device_id { kernel_ulong_t driver_data; }; +#define MDIO_NAME_SIZE 32 #define MDIO_MODULE_PREFIX "mdio:" #define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 014fbe256d55..58afbd1cc659 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3888,10 +3888,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); -int netdev_default_l2upper_neigh_construct(struct net_device *dev, - struct neighbour *n); -void netdev_default_l2upper_neigh_destroy(struct net_device *dev, - struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/include/linux/phy.h b/include/linux/phy.h index 43474f39ef65..d9bdf53e0514 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -81,6 +81,9 @@ typedef enum { PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, + PHY_INTERFACE_MODE_1000BASEX, + PHY_INTERFACE_MODE_2500BASEX, + PHY_INTERFACE_MODE_RXAUI, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -141,6 +144,12 @@ static inline const char *phy_modes(phy_interface_t interface) return "qsgmii"; case PHY_INTERFACE_MODE_TRGMII: return "trgmii"; + case PHY_INTERFACE_MODE_1000BASEX: + return "1000base-x"; + case PHY_INTERFACE_MODE_2500BASEX: + return "2500base-x"; + case PHY_INTERFACE_MODE_RXAUI: + return "rxaui"; default: return "unknown"; } @@ -877,6 +886,25 @@ void mdio_bus_exit(void); extern struct bus_type mdio_bus_type; +struct mdio_board_info { + const char *bus_id; + char modalias[MDIO_NAME_SIZE]; + int mdio_addr; + const void *platform_data; +}; + +#if IS_ENABLED(CONFIG_PHYLIB) +int mdiobus_register_board_info(const struct mdio_board_info *info, + unsigned int n); +#else +static inline int mdiobus_register_board_info(const struct mdio_board_info *i, + unsigned int n) +{ + return 0; +} +#endif + + /** * module_phy_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6a78e1892b6..f1adddc1c5ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @dst_pending_confirm: need to confirm neighbour * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -741,6 +742,7 @@ struct sk_buff { __u8 csum_level:2; __u8 csum_bad:1; + __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } +static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) +{ + skb->dst_pending_confirm = val; +} + +static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) +{ + return skb->dst_pending_confirm != 0; +} + static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d5eb5479a425..04b0d3f95043 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) return container_of(_dev, struct virtio_device, dev); } +void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); +void virtio_config_disable(struct virtio_device *dev); +void virtio_config_enable(struct virtio_device *dev); +int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..65619a2de6f4 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 return n; } +static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, diff --git a/include/net/dsa.h b/include/net/dsa.h index 2cb77e64d648..b49b2004891e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -13,6 +13,7 @@ #include <linux/if_ether.h> #include <linux/list.h> +#include <linux/notifier.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> @@ -44,6 +45,11 @@ struct dsa_chip_data { struct device *host_dev; int sw_addr; + /* + * Reference to network devices + */ + struct device *netdev[DSA_MAX_PORTS]; + /* set to size of eeprom if supported by the switch */ int eeprom_len; @@ -92,6 +98,9 @@ struct packet_type; struct dsa_switch_tree { struct list_head list; + /* Notifier chain for switch-wide events */ + struct raw_notifier_head nh; + /* Tree identifier */ u32 tree; @@ -166,6 +175,7 @@ struct dsa_mall_tc_entry { struct dsa_port { struct dsa_switch *ds; unsigned int index; + const char *name; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; @@ -182,6 +192,9 @@ struct dsa_switch { struct dsa_switch_tree *dst; int index; + /* Listener for switch fabric events */ + struct notifier_block nb; + /* * Give the switch driver somewhere to hang its private data * structure. @@ -261,6 +274,16 @@ struct switchdev_obj_port_fdb; struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; +#define DSA_NOTIFIER_BRIDGE_JOIN 1 +#define DSA_NOTIFIER_BRIDGE_LEAVE 2 + +/* DSA_NOTIFIER_BRIDGE_* */ +struct dsa_notifier_bridge_info { + struct net_device *br; + int sw_index; + int port; +}; + struct dsa_switch_ops { /* * Probing and setup. @@ -428,6 +451,7 @@ struct dsa_switch_driver { void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); +struct net_device *dsa_dev_to_net_device(struct device *dev); static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { diff --git a/include/net/dst.h b/include/net/dst.h index 6835d224d47b..84a1043dd6a1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0100 #define DST_METADATA 0x0200 - unsigned short pending_confirm; - short error; /* A non-zero value of dst->obsolete forces by-hand validation @@ -78,6 +76,8 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ + unsigned short __pad3; + #ifdef CONFIG_IP_ROUTE_CLASSID __u32 tclassid; #else @@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - dst->pending_confirm = 1; } static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, @@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, { const struct hh_cache *hh; - if (dst->pending_confirm) { - unsigned long now = jiffies; - - dst->pending_confirm = 0; - /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - } - hh = &n->hh; if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) return neigh_hh_output(hh, skb); @@ -477,6 +467,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst return IS_ERR(n) ? NULL : n; } +static inline void dst_confirm_neigh(const struct dst_entry *dst, + const void *daddr) +{ + if (dst->ops->confirm_neigh) + dst->ops->confirm_neigh(dst, daddr); +} + static inline void dst_link_failure(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 8a2b66d8d78d..c84b3287e38b 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -33,6 +33,8 @@ struct dst_ops { struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); + void (*confirm_neigh)(const struct dst_entry *dst, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d562a2fe4860..8a0214654b6b 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -391,6 +391,23 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons return n; } +static inline void __ipv6_confirm_neigh(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3cfd365bcfbc..480b65a24aff 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -593,10 +593,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && !dst_check(t->dst, t->dst_cookie)) { - dst_release(t->dst); - t->dst = NULL; - } + if (t->dst && !dst_check(t->dst, t->dst_cookie)) + sctp_transport_dst_release(t); return t->dst; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 231fa9ac50bd..6a685049f67f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -804,6 +804,8 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + __u32 dst_pending_confirm; /* need to confirm neighbour */ + /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -950,6 +952,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); void sctp_transport_immediate_rtx(struct sctp_transport *); +void sctp_transport_dst_release(struct sctp_transport *t); +void sctp_transport_dst_confirm(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/include/net/sock.h b/include/net/sock.h index 94e65fd70354..6f83e78eaa5a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -240,6 +240,7 @@ struct sock_common { * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache + * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -393,6 +394,8 @@ struct sock { struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; + __u32 sk_dst_pending_confirm; + /* Note: 32bit hole on 64bit arches */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -1764,6 +1767,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; } } } @@ -1774,6 +1778,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; /* * This can be called while sk is owned by the caller only, * with no state that can be checked in a rcu_dereference_check() cond @@ -1789,6 +1794,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -1809,6 +1815,26 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline void sk_dst_confirm(struct sock *sk) +{ + if (!sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 1; +} + +static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) +{ + if (skb_get_dst_pending_confirm(skb)) { + struct sock *sk = skb->sk; + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + if (sk && sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 0; + } +} + bool sk_mc_loop(struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 1beab5532035..4b34c51f859e 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -160,8 +160,7 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { - return dev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_vlan_id(dev) : 0xffff; + return is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 0xffff; } static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) @@ -326,8 +325,7 @@ static inline u16 rdma_get_vlan_id(union ib_gid *dgid) static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { - return dev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_real_dev(dev) : NULL; + return is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : NULL; } #endif /* IB_ADDR_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fb3513b35c0b..1a754e5d2695 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1397,7 +1397,7 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env, imm = insn->imm; add_imm: - if (imm <= 0) { + if (imm < 0) { verbose("addition of negative constant to packet pointer is not allowed\n"); return -EACCES; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 116455ac3db5..e97ab824e368 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -791,8 +791,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, - .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, - .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6c46d1b4cdbb..d208ee9ab60a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -347,8 +347,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, - .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, - .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, @@ -413,4 +411,5 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); + INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e4a4176171c9..5028691fa68a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) if (f->added_by_external_learn) fdb_del_external_learn(f); - hlist_del_rcu(&f->hlist); + hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); } @@ -290,34 +290,43 @@ out: spin_unlock_bh(&br->hash_lock); } -void br_fdb_cleanup(unsigned long _data) +void br_fdb_cleanup(struct work_struct *work) { - struct net_bridge *br = (struct net_bridge *)_data; + struct net_bridge *br = container_of(work, struct net_bridge, + gc_work.work); unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->ageing_time; + unsigned long work_delay = delay; + unsigned long now = jiffies; int i; - spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; struct hlist_node *n; + if (!br->hash[i].first) + continue; + + spin_lock_bh(&br->hash_lock); hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; + if (f->is_static) continue; if (f->added_by_external_learn) continue; this_timer = f->updated + delay; - if (time_before_eq(this_timer, jiffies)) + if (time_after(this_timer, now)) + work_delay = min(work_delay, this_timer - now); + else fdb_delete(br, f); - else if (time_before(this_timer, next_timer)) - next_timer = this_timer; } + spin_unlock_bh(&br->hash_lock); + cond_resched(); } - spin_unlock(&br->hash_lock); - mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); + /* Cleanup minimum 10 milliseconds apart */ + work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); + mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } /* Completely flush all dynamic entries in forwarding database.*/ @@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, &br->hash[br_mac_hash(addr, vid)], hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) { - if (unlikely(has_expired(br, fdb))) - break; return fdb; } } @@ -590,7 +597,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->dst = source; fdb_modified = true; } - fdb->updated = jiffies; + if (jiffies != fdb->updated) + fdb->updated = jiffies; if (unlikely(added_by_user)) fdb->added_by_user = 1; if (unlikely(fdb_modified)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed0dd3340084..8ac1770aa222 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_vlan_flush(br); br_multicast_dev_del(br); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); br_sysfs_delbr(br->dev); unregister_netdevice_queue(br->dev, head); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index fba38d8a1a08..220943f920d2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -198,7 +198,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (dst->is_local) return br_pass_frame_up(skb); - dst->used = jiffies; + if (jiffies != dst->used) + dst->used = jiffies; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index da8157c57eb1..7970f8540cbb 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) b.hello_timer_value = br_timer_value(&br->hello_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); - b.gc_timer_value = br_timer_value(&br->gc_timer); + b.gc_timer_value = br_timer_value(&br->gc_work.timer); rcu_read_unlock(); if (copy_to_user((void __user *)args[1], &b, sizeof(b))) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fc5d885dbb22..1cbdc5b96aa7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = br_timer_value(&br->gc_timer); + clockval = br_timer_value(&br->gc_work.timer); if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 40177df45ba6..1cbbf63a5ef7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -160,19 +160,21 @@ struct net_bridge_vlan_group { u16 pvid; }; -struct net_bridge_fdb_entry -{ +struct net_bridge_fdb_entry { struct hlist_node hlist; struct net_bridge_port *dst; - unsigned long updated; - unsigned long used; mac_addr addr; __u16 vlan_id; unsigned char is_local:1, is_static:1, added_by_user:1, added_by_external_learn:1; + + /* write-heavy members should not affect lookups */ + unsigned long updated ____cacheline_aligned_in_smp; + unsigned long used; + struct rcu_head rcu; }; @@ -212,12 +214,16 @@ struct net_bridge_mdb_htable u32 ver; }; -struct net_bridge_port -{ +struct net_bridge_port { struct net_bridge *br; struct net_device *dev; struct list_head list; + unsigned long flags; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + /* STP */ u8 priority; u8 state; @@ -238,8 +244,6 @@ struct net_bridge_port struct kobject kobj; struct rcu_head rcu; - unsigned long flags; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) @@ -259,9 +263,6 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; -#endif #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif @@ -283,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct net_bridge -{ +struct net_bridge { spinlock_t lock; + spinlock_t hash_lock; struct list_head port_list; struct net_device *dev; - struct pcpu_sw_netstats __percpu *stats; - spinlock_t hash_lock; + /* These fields are accessed on each packet */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + u8 vlan_stats_enabled; + __be16 vlan_proto; + u16 default_pvid; + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { @@ -308,6 +316,9 @@ struct net_bridge bridge_id designated_root; bridge_id bridge_id; u32 root_path_cost; + unsigned char topology_change; + unsigned char topology_change_detected; + u16 root_port; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; @@ -319,7 +330,6 @@ struct net_bridge u8 group_addr[ETH_ALEN]; bool group_addr_set; - u16 root_port; enum { BR_NO_STP, /* no spanning tree */ @@ -327,9 +337,6 @@ struct net_bridge BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; - unsigned char topology_change; - unsigned char topology_change_detected; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING unsigned char multicast_router; @@ -374,21 +381,13 @@ struct net_bridge struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; - struct timer_list gc_timer; + struct delayed_work gc_work; struct kobject *ifobj; u32 auto_cnt; #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif - -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; - u8 vlan_enabled; - u8 vlan_stats_enabled; - __be16 vlan_proto; - u16 default_pvid; -#endif }; struct br_input_skb_cb { @@ -505,7 +504,7 @@ void br_fdb_find_delete_local(struct net_bridge *br, const unsigned char *addr, u16 vid); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); -void br_fdb_cleanup(unsigned long arg); +void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 71fd1a4e63cc..8f56c2d1f1a7 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) br->ageing_time = t; spin_unlock_bh(&br->lock); - mod_timer(&br->gc_timer, jiffies); + mod_delayed_work(system_long_wq, &br->gc_work, 0); return 0; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 6c1e21411125..08341d2aa9c9 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br) spin_lock_bh(&br->lock); if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, jiffies + br->hello_time); - mod_timer(&br->gc_timer, jiffies + HZ/10); + mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); br_config_bpdu_generation(br); @@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br) del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); del_timer_sync(&br->tcn_timer); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7ddb38e0a06e..c98b3e5c140a 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br) setup_timer(&br->topology_change_timer, br_topology_change_timer_expired, (unsigned long) br); - - setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); } void br_stp_port_timer_init(struct net_bridge_port *p) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index a18148213b08..0f4034934d56 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); } static DEVICE_ATTR_RO(gc_timer); diff --git a/net/core/dev.c b/net/core/dev.c index 404d2e6d5d32..3e1a60102e64 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6111,50 +6111,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); -int netdev_default_l2upper_neigh_construct(struct net_device *dev, - struct neighbour *n) -{ - struct net_device *lower_dev, *stop_dev; - struct list_head *iter; - int err; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - if (!lower_dev->netdev_ops->ndo_neigh_construct) - continue; - err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); - if (err) { - stop_dev = lower_dev; - goto rollback; - } - } - return 0; - -rollback: - netdev_for_each_lower_dev(dev, lower_dev, iter) { - if (lower_dev == stop_dev) - break; - if (!lower_dev->netdev_ops->ndo_neigh_destroy) - continue; - lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); - } - return err; -} -EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); - -void netdev_default_l2upper_neigh_destroy(struct net_device *dev, - struct neighbour *n) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - if (!lower_dev->netdev_ops->ndo_neigh_destroy) - continue; - lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); - } -} -EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); - static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/dst.c b/net/core/dst.c index b5cbbe07f786..960e503b5a52 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; - dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); diff --git a/net/core/sock.c b/net/core/sock.c index 8b35debfe454..b74356535559 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; @@ -1519,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; + newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; atomic_set(&newsk->sk_drops, 0); diff --git a/net/dsa/Makefile b/net/dsa/Makefile index a3380ed0e0be..72912982de3d 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,7 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o slave.o dsa2.o +dsa_core-y += dsa.o slave.o dsa2.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 619e57a44d1d..b6d4f6a23f06 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -275,6 +275,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) return ret; + ret = dsa_switch_register_notifier(ds); + if (ret) + return ret; + if (ops->set_addr) { ret = ops->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) @@ -400,6 +404,8 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); } #ifdef CONFIG_PM_SLEEP @@ -486,7 +492,7 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); -static struct net_device *dev_to_net_device(struct device *dev) +struct net_device *dsa_dev_to_net_device(struct device *dev) { struct device *d; @@ -503,6 +509,7 @@ static struct net_device *dev_to_net_device(struct device *dev) return NULL; } +EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); #ifdef CONFIG_OF static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, @@ -811,7 +818,7 @@ static int dsa_probe(struct platform_device *pdev) dev = pd->of_netdev; dev_hold(dev); } else { - dev = dev_to_net_device(pd->netdev); + dev = dsa_dev_to_net_device(pd->netdev); } if (dev == NULL) { ret = -EPROBE_DEFER; @@ -903,10 +910,6 @@ static struct packet_type dsa_pack_type __read_mostly = { .func = dsa_switch_rcv, }; -static struct notifier_block dsa_netdevice_nb __read_mostly = { - .notifier_call = dsa_slave_netdevice_event, -}; - #ifdef CONFIG_PM_SLEEP static int dsa_suspend(struct device *d) { @@ -964,7 +967,9 @@ static int __init dsa_init_module(void) { int rc; - register_netdevice_notifier(&dsa_netdevice_nb); + rc = dsa_slave_register_notifier(); + if (rc) + return rc; rc = platform_driver_register(&dsa_driver); if (rc) @@ -978,7 +983,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { - unregister_netdevice_notifier(&dsa_netdevice_nb); + dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9f8cc26be9ea..6f5f0a2ad256 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -78,19 +78,28 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst, kref_put(&dst->refcount, dsa_free_dst); } +/* For platform data configurations, we need to have a valid name argument to + * differentiate a disabled port from an enabled one + */ static bool dsa_port_is_valid(struct dsa_port *port) { - return !!port->dn; + return !!(port->dn || port->name); } static bool dsa_port_is_dsa(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "link", 0); + if (port->name && !strcmp(port->name, "dsa")) + return true; + else + return !!of_parse_phandle(port->dn, "link", 0); } static bool dsa_port_is_cpu(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "ethernet", 0); + if (port->name && !strcmp(port->name, "cpu")) + return true; + else + return !!of_parse_phandle(port->dn, "ethernet", 0); } static bool dsa_ds_find_port_dn(struct dsa_switch *ds, @@ -250,10 +259,11 @@ static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, static int dsa_user_port_apply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { - const char *name; + const char *name = port->name; int err; - name = of_get_property(port->dn, "label", NULL); + if (port->dn) + name = of_get_property(port->dn, "label", NULL); if (!name) name = "eth%d"; @@ -294,6 +304,10 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; + err = dsa_switch_register_notifier(ds); + if (err) + return err; + if (ds->ops->set_addr) { err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) @@ -364,6 +378,8 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); } static int dsa_dst_apply(struct dsa_switch_tree *dst) @@ -438,11 +454,16 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct net_device *ethernet_dev; struct device_node *ethernet; - ethernet = of_parse_phandle(port->dn, "ethernet", 0); - if (!ethernet) - return -EINVAL; + if (port->dn) { + ethernet = of_parse_phandle(port->dn, "ethernet", 0); + if (!ethernet) + return -EINVAL; + ethernet_dev = of_find_net_device_by_node(ethernet); + } else { + ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); + dev_put(ethernet_dev); + } - ethernet_dev = of_find_net_device_by_node(ethernet); if (!ethernet_dev) return -EPROBE_DEFER; @@ -545,6 +566,33 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) return 0; } +static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +{ + bool valid_name_found = false; + unsigned int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + if (!cd->port_names[i]) + continue; + + ds->ports[i].name = cd->port_names[i]; + + /* Initialize enabled_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + if (!dsa_port_is_cpu(&ds->ports[i])) + ds->enabled_port_mask |= 1 << i; + + valid_name_found = true; + } + + if (!valid_name_found && i == DSA_MAX_PORTS) + return -EINVAL; + + return 0; +} + static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) { int err; @@ -569,6 +617,18 @@ static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) return 0; } +static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) +{ + if (!pd) + return -ENODEV; + + /* We do not support complex trees with dsa_chip_data */ + *tree = 0; + *index = 0; + + return 0; +} + static struct device_node *dsa_get_ports(struct dsa_switch *ds, struct device_node *np) { @@ -585,23 +645,34 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) { + struct dsa_chip_data *pdata = dev->platform_data; struct device_node *np = dev->of_node; struct dsa_switch_tree *dst; struct device_node *ports; u32 tree, index; int i, err; - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; + if (np) { + err = dsa_parse_member_dn(np, &tree, &index); + if (err) + return err; - ports = dsa_get_ports(ds, np); - if (IS_ERR(ports)) - return PTR_ERR(ports); + ports = dsa_get_ports(ds, np); + if (IS_ERR(ports)) + return PTR_ERR(ports); - err = dsa_parse_ports_dn(ports, ds); - if (err) - return err; + err = dsa_parse_ports_dn(ports, ds); + if (err) + return err; + } else { + err = dsa_parse_member(pdata, &tree, &index); + if (err) + return err; + + err = dsa_parse_ports(pdata, ds); + if (err) + return err; + } dst = dsa_get_dst(tree); if (!dst) { @@ -617,6 +688,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) ds->dst = dst; ds->index = index; + ds->cd = pdata; /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index a5509b765fc0..0706a511244e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -63,8 +63,12 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); -int dsa_slave_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr); +int dsa_slave_register_notifier(void); +void dsa_slave_unregister_notifier(void); + +/* switch.c */ +int dsa_switch_register_notifier(struct dsa_switch *ds); +void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 09fc3e9462c1..061a49c29cef 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -27,6 +27,17 @@ static bool dsa_slave_dev_check(struct net_device *dev); +static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct raw_notifier_head *nh = &p->dp->ds->dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -74,9 +85,12 @@ static inline bool dsa_port_is_bridged(struct dsa_port *dp) return !!dp->bridge_dev; } -static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) +static void dsa_slave_set_state(struct net_device *dev, u8 state) { - struct dsa_port *dp = &ds->ports[port]; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; + struct dsa_switch *ds = dp->ds; + int port = dp->index; if (ds->ops->port_stp_state_set) ds->ops->port_stp_state_set(ds, port, state); @@ -133,7 +147,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - dsa_port_set_stp_state(ds, p->dp->index, stp_state); + dsa_slave_set_state(dev, stp_state); if (p->phy) phy_start(p->phy); @@ -175,7 +189,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->dp->index, p->phy); - dsa_port_set_stp_state(ds, p->dp->index, BR_STATE_DISABLED); + dsa_slave_set_state(dev, BR_STATE_DISABLED); return 0; } @@ -382,7 +396,7 @@ static int dsa_slave_stp_state_set(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - dsa_port_set_stp_state(ds, p->dp->index, attr->u.stp_state); + dsa_slave_set_state(dev, attr->u.stp_state); return 0; } @@ -559,32 +573,51 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int ret = -EOPNOTSUPP; + struct dsa_notifier_bridge_info info = { + .sw_index = p->dp->ds->index, + .port = p->dp->index, + .br = br, + }; + int err; + /* Here the port is already bridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ p->dp->bridge_dev = br; - if (ds->ops->port_bridge_join) - ret = ds->ops->port_bridge_join(ds, p->dp->index, br); + err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info); - return ret == -EOPNOTSUPP ? 0 : ret; + /* The bridging is rolled back on error */ + if (err) + p->dp->bridge_dev = NULL; + + return err; } static void dsa_slave_bridge_port_leave(struct net_device *dev, struct net_device *br) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_notifier_bridge_info info = { + .sw_index = p->dp->ds->index, + .port = p->dp->index, + .br = br, + }; + int err; + /* Here the port is already unbridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ p->dp->bridge_dev = NULL; - if (ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, p->dp->index, br); + err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + if (err) + netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_port_set_stp_state(ds, p->dp->index, BR_STATE_FORWARDING); + dsa_slave_set_state(dev, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -1491,46 +1524,52 @@ static bool dsa_slave_dev_check(struct net_device *dev) return dev->netdev_ops == &dsa_slave_netdev_ops; } -static int dsa_slave_port_upper_event(struct net_device *dev, - unsigned long event, void *ptr) +static int dsa_slave_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { - struct netdev_notifier_changeupper_info *info = ptr; - struct net_device *upper = info->upper_dev; - int err = 0; + int err = NOTIFY_DONE; - switch (event) { - case NETDEV_CHANGEUPPER: - if (netif_is_bridge_master(upper)) { - if (info->linking) - err = dsa_slave_bridge_port_join(dev, upper); - else - dsa_slave_bridge_port_leave(dev, upper); + if (netif_is_bridge_master(info->upper_dev)) { + if (info->linking) { + err = dsa_slave_bridge_port_join(dev, info->upper_dev); + err = notifier_from_errno(err); + } else { + dsa_slave_bridge_port_leave(dev, info->upper_dev); + err = NOTIFY_OK; } - - break; } - return notifier_from_errno(err); + return err; } -static int dsa_slave_port_event(struct net_device *dev, unsigned long event, - void *ptr) +static int dsa_slave_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) { - switch (event) { - case NETDEV_CHANGEUPPER: - return dsa_slave_port_upper_event(dev, event, ptr); - } + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (dev->netdev_ops != &dsa_slave_netdev_ops) + return NOTIFY_DONE; + + if (event == NETDEV_CHANGEUPPER) + return dsa_slave_changeupper(dev, ptr); return NOTIFY_DONE; } -int dsa_slave_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static struct notifier_block dsa_slave_nb __read_mostly = { + .notifier_call = dsa_slave_netdevice_event, +}; + +int dsa_slave_register_notifier(void) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); + return register_netdevice_notifier(&dsa_slave_nb); +} - if (dsa_slave_dev_check(dev)) - return dsa_slave_port_event(dev, event, ptr); +void dsa_slave_unregister_notifier(void) +{ + int err; - return NOTIFY_DONE; + err = unregister_netdevice_notifier(&dsa_slave_nb); + if (err) + pr_err("DSA: failed to unregister slave notifier (%d)\n", err); } diff --git a/net/dsa/switch.c b/net/dsa/switch.c new file mode 100644 index 000000000000..6456dacf9ae9 --- /dev/null +++ b/net/dsa/switch.c @@ -0,0 +1,85 @@ +/* + * Handling of a single switch chip, part of a switch fabric + * + * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/netdevice.h> +#include <linux/notifier.h> +#include <net/dsa.h> + +static int dsa_switch_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_bridge_join) + return ds->ops->port_bridge_join(ds, info->port, info->br); + + if (ds->index != info->sw_index) + dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n", + info->sw_index, info->port, netdev_name(info->br)); + + return 0; +} + +static int dsa_switch_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->port, info->br); + + if (ds->index != info->sw_index) + dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n", + info->sw_index, info->port, netdev_name(info->br)); + + return 0; +} + +static int dsa_switch_event(struct notifier_block *nb, + unsigned long event, void *info) +{ + struct dsa_switch *ds = container_of(nb, struct dsa_switch, nb); + int err; + + switch (event) { + case DSA_NOTIFIER_BRIDGE_JOIN: + err = dsa_switch_bridge_join(ds, info); + break; + case DSA_NOTIFIER_BRIDGE_LEAVE: + err = dsa_switch_bridge_leave(ds, info); + break; + default: + err = -EOPNOTSUPP; + break; + } + + /* Non-switchdev operations cannot be rolled back. If a DSA driver + * returns an error during the chained call, switch chips may be in an + * inconsistent state. + */ + if (err) + dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n", + event, err); + + return notifier_from_errno(err); +} + +int dsa_switch_register_notifier(struct dsa_switch *ds) +{ + ds->nb.notifier_call = dsa_switch_event; + + return raw_notifier_chain_register(&ds->dst->nh, &ds->nb); +} + +void dsa_switch_unregister_notifier(struct dsa_switch *ds) +{ + int err; + + err = raw_notifier_chain_unregister(&ds->dst->nh, &ds->nb); + if (err) + dev_err(ds->dev, "failed to unregister notifier (%d)\n", err); +} diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index f5b60388d02f..56080da4aa77 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -12,6 +12,7 @@ #include "hsr_slave.h" #include <linux/etherdevice.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> #include "hsr_main.h" #include "hsr_device.h" #include "hsr_forward.h" @@ -81,7 +82,7 @@ static int hsr_check_dev_ok(struct net_device *dev) return -EINVAL; } - if (dev->priv_flags & IFF_802_1Q_VLAN) { + if (is_vlan_dev(dev)) { netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); return -EINVAL; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b67719f45953..7a719f1ae556 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -222,7 +222,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { - int res = dst_neigh_output(dst, neigh, skb); + int res; + + sock_confirm_neigh(skb, neigh); + res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return res; @@ -886,6 +889,9 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1086,6 +1092,9 @@ alloc_new_skb: exthdrlen = 0; csummode = CHECKSUM_NONE; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue. */ diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 592db6a3a0e9..6ee792d83d5b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -848,7 +848,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4e49e5cb001c..8119e1f66e03 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -383,6 +383,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_from_msg(iph, msg, length)) @@ -666,7 +669,8 @@ out: return len; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b7c231c1aef..cb494a5050f7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -168,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = { .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, + .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + const struct rtable *rt; + + rt = (const struct rtable *)dst; + if (rt->rt_gateway) + pkey = (const __be32 *)&rt->rt_gateway; + else if (!daddr || + (rt->rt_flags & + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + return; + + __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); +} + #define IP_IDENTS_SZ 2048u static atomic_t *ip_idents __read_mostly; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 27c95acbb52f..2c0ff327b6df 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3644,11 +3644,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { - struct dst_entry *dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - } + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) + sk_dst_confirm(sk); if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); @@ -5995,7 +5992,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; case TCP_FIN_WAIT1: { - struct dst_entry *dst; int tmo; /* If we enter the TCP_FIN_WAIT1 state and we are a @@ -6022,9 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); + sk_dst_confirm(sk); if (!sock_flag(sk, SOCK_DEAD)) { /* Wake up lingering close() */ diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b9ed0d50aead..0f46e5fe31ad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -375,12 +375,10 @@ void tcp_update_metrics(struct sock *sk) u32 val; int m; + sk_dst_confirm(sk); if (sysctl_tcp_nometrics_save || !dst) return; - if (dst->flags & DST_HOST) - dst_confirm(dst); - rcu_read_lock(); if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? @@ -493,11 +491,10 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ + sk_dst_confirm(sk); if (!dst) goto reset; - dst_confirm(dst); - rcu_read_lock(); tm = tcp_get_metrics(sk, dst, true); if (!tm) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccf1ef4dcba4..61f272a99a49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -980,6 +980,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); + skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; th->source = inet->inet_sport; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf6ba3387401..4a1ba04565d1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1101,7 +1101,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b6a94ff0bbd0..d299040613a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,6 +119,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return ret; @@ -1144,6 +1145,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1516,6 +1520,9 @@ alloc_new_skb: exthdrlen = 0; dst_exthdrlen = 0; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ea89073c8247..f174e76e6505 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -654,6 +654,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); if (err) @@ -934,7 +937,8 @@ out: txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8ffa24cc8899..f54f4265b37f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -223,6 +223,21 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, return neigh_create(&nd_tbl, daddr, dst->dev); } +static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + struct rt6_info *rt = (struct rt6_info *)dst; + + daddr = choose_neigh_daddr(rt, NULL, daddr); + if (!daddr) + return; + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + return; + if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) + return; + __ipv6_confirm_neigh(dev, daddr); +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .gc = ip6_dst_gc, @@ -239,6 +254,7 @@ static struct dst_ops ip6_dst_ops_template = { .redirect = rt6_do_redirect, .local_out = __ip6_local_out, .neigh_lookup = ip6_neigh_lookup, + .confirm_neigh = ip6_confirm_neigh, }; static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) @@ -1365,6 +1381,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct ipv6hdr *iph, u32 mtu) { + const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; if (rt6->rt6i_flags & RTF_LOCAL) @@ -1373,26 +1390,26 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (dst_metric_locked(dst, RTAX_MTU)) return; - dst_confirm(dst); + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + daddr = NULL; + saddr = NULL; + } + dst_confirm_neigh(dst, daddr); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); - } else { - const struct in6_addr *daddr, *saddr; + } else if (daddr) { struct rt6_info *nrt6; - if (iph) { - daddr = &iph->daddr; - saddr = &iph->saddr; - } else if (sk) { - daddr = &sk->sk_v6_daddr; - saddr = &inet6_sk(sk)->saddr; - } else { - return; - } nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -2316,7 +2333,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->dst); + dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 6124e159c882..85582257d3af 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -55,8 +55,8 @@ static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, }; -int nla_put_srh(struct sk_buff *skb, int attrtype, - struct seg6_iptunnel_encap *tuninfo) +static int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) { struct seg6_iptunnel_encap *data; struct nlattr *nla; @@ -235,7 +235,7 @@ static int seg6_do_srh(struct sk_buff *skb) return 0; } -int seg6_input(struct sk_buff *skb) +static int seg6_input(struct sk_buff *skb) { int err; @@ -251,7 +251,7 @@ int seg6_input(struct sk_buff *skb) return dst_input(skb); } -int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b4c6516a3a0c..51346fa70298 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1308,7 +1308,8 @@ out: return err; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 4b06eb415f68..734798a00ca0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -658,7 +658,8 @@ out: return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 84682f02b611..af49c7dca860 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,8 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#include <linux/if_arp.h> - #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e50dc6d7543f..2a6835b4562b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -832,8 +832,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, if (transport->state != SCTP_UNCONFIRMED) transport->state = SCTP_INACTIVE; else { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); ulp_notify = false; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 07ab5062e541..814eac047467 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -546,6 +546,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; + int confirm; struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; @@ -624,7 +625,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; - tp->af_specific->sctp_xmit(head, tp); + confirm = tp->dst_pending_confirm; + if (confirm) + skb_set_dst_pending_confirm(head, 1); + /* neighbour should be confirmed on successful transmission or + * positive error + */ + if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm) + tp->dst_pending_confirm = 0; out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 65abe22d8691..db352e5d61f8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1654,7 +1654,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, if (forward_progress) { if (transport->dst) - dst_confirm(transport->dst); + sctp_transport_dst_confirm(transport); } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ad3445b3408e..c7d3249f88ec 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3333,8 +3333,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; case SCTP_PARAM_DEL_IP: @@ -3348,8 +3347,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; default: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a4552712b882..51abcc90fe75 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -755,7 +755,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * forward progress. */ if (t->dst) - dst_confirm(t->dst); + sctp_transport_dst_confirm(t); /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 782e579472c9..d8798ddda726 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3867,6 +3867,9 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + if (!asoc->peer.prsctp_capable) + return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); + /* Make sure that the FORWARD_TSN chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, @@ -3935,6 +3938,9 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + if (!asoc->peer.prsctp_capable) + return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); + /* Make sure that the FORWARD_TSN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fc7122c76de..a4609a0be76d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -592,7 +592,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { /* Clear the source and route cache */ - dst_release(trans->dst); + sctp_transport_dst_release(trans); trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); trans->ssthresh = asoc->peer.i.a_rwnd; @@ -843,7 +843,7 @@ skip_mkasconf: */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index baa1ac00d7b5..5b63ceb3bf37 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -240,7 +240,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ if (!transport->dst || transport->dst->obsolete) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); } @@ -672,3 +672,17 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t) sctp_transport_hold(t); } } + +/* Drop dst */ +void sctp_transport_dst_release(struct sctp_transport *t) +{ + dst_release(t->dst); + t->dst = NULL; + t->dst_pending_confirm = 0; +} + +/* Schedule neighbour confirm */ +void sctp_transport_dst_confirm(struct sctp_transport *t) +{ + t->dst_pending_confirm = 1; +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 99ad1af2927f..0a0f63d3cc96 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2856,6 +2856,23 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, return dst->path->ops->neigh_lookup(dst, skb, daddr); } +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + const struct dst_entry *path = dst->path; + + for (; dst != path; dst = dst->child) { + const struct xfrm_state *xfrm = dst->xfrm; + + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) + continue; + if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = xfrm->coaddr; + else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) + daddr = &xfrm->id.daddr; + } + path->ops->confirm_neigh(path, daddr); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -2882,6 +2899,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; + if (likely(!dst_ops->confirm_neigh)) + dst_ops->confirm_neigh = xfrm_confirm_neigh; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = xfrm_garbage_collect_deferred; rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 0d0912c7f03c..71f6407cde60 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2404,6 +2404,29 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), + BPF_MOV64_IMM(BPF_REG_5, 12), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -4370,6 +4393,61 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, + { + "invalid and of negative number", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "invalid range check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9), + BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + .result_unpriv = REJECT, + } }; static int probe_filter_length(const struct bpf_insn *fp) |