summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/stmmac.txt4
-rw-r--r--Documentation/networking/switchdev.txt14
-rw-r--r--drivers/net/bonding/bond_3ad.c2
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c33
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h5
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c84
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c113
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c135
-rw-r--r--drivers/net/ethernet/rocker/rocker.c73
-rw-r--r--drivers/net/ethernet/rocker/rocker.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c59
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c31
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c136
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h8
-rw-r--r--include/linux/netdevice.h13
-rw-r--r--include/linux/skbuff.h15
-rw-r--r--include/linux/stmmac.h2
-rw-r--r--include/net/cls_cgroup.h29
-rw-r--r--include/net/switchdev.h9
-rw-r--r--include/uapi/linux/bpf.h7
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_multicast.c29
-rw-r--r--net/bridge/br_private.h5
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/filter.c15
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/sched/cls_cgroup.c23
-rw-r--r--net/switchdev/switchdev.c111
-rw-r--r--net/tipc/bcast.c31
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c26
-rw-r--r--net/tipc/bearer.h3
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c20
-rw-r--r--net/tipc/link.c1517
-rw-r--r--net/tipc/link.h74
-rw-r--r--net/tipc/msg.h53
-rw-r--r--net/tipc/name_distr.c6
-rw-r--r--net/tipc/node.c549
-rw-r--r--net/tipc/node.h93
-rw-r--r--net/tipc/socket.c71
43 files changed, 2052 insertions, 1374 deletions
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index e655e2453c98..5fddefa69baf 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -139,8 +139,6 @@ struct plat_stmmacenet_data {
void (*free)(struct platform_device *pdev, void *priv);
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
- void *custom_cfg;
- void *custom_data;
void *bsp_priv;
};
@@ -186,8 +184,6 @@ Where:
which will be stored in bsp_priv, and then passed to init and
exit callbacks. init/exit callbacks should not use or modify
platform data.
- o custom_cfg/custom_data: this is a custom configuration that can be passed
- while initializing the resources.
o bsp_priv: another private pointer.
For MDIO bus The we have:
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index c5d7ade10ff2..9825f32a8634 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's
current STP state. The switch driver, knowing which ports are within which
vlan L2 domain, can program the switch device for flooding. The packet should
also be sent to the port netdev for processing by the bridge driver. The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver should mark a packet as already
+forwarded using skb->offload_fwd_mark. The same mark is set on the device
+ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
It is possible for the switch device to not handle flooding and push the
packets up to the bridge driver for flooding. This is not ideal as the number
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7fde4d5c2b28..3c45358844eb 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1870,8 +1870,6 @@ static void ad_marker_info_received(struct bond_marker *marker_info,
static void ad_marker_response_received(struct bond_marker *marker,
struct port *port)
{
- marker = NULL;
- port = NULL;
/* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 317a49480475..1c6a773c87ea 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3751,7 +3751,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
struct slave *slave;
struct list_head *iter;
struct bond_up_slave *new_arr, *old_arr;
- int slaves_in_agg;
int agg_id = 0;
int ret = 0;
@@ -3782,7 +3781,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
}
goto out;
}
- slaves_in_agg = ad_info.ports;
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 64c1e9db6b0b..5bf7ce0ae221 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -907,9 +907,8 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
}
bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-
if (mode == GENET_POWER_PASSIVE)
- bcmgenet_mii_reset(priv->dev);
+ bcmgenet_phy_power_set(priv->dev, true);
}
/* ioctl handle special commands that are not present in ethtool. */
@@ -1725,7 +1724,7 @@ static int init_umac(struct bcmgenet_priv *priv)
int0_enable |= UMAC_IRQ_TXDMA_DONE;
/* Monitor cable plug/unplugged event for internal PHY */
- if (phy_is_internal(priv->phydev)) {
+ if (priv->internal_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
} else if (priv->ext_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
@@ -2632,7 +2631,7 @@ static int bcmgenet_open(struct net_device *dev)
/* If this is an internal GPHY, power it back on now, before UniMAC is
* brought out of reset as absolutely no UniMAC activity is allowed
*/
- if (phy_is_internal(priv->phydev))
+ if (priv->internal_phy)
bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
/* take MAC out of reset */
@@ -2651,7 +2650,7 @@ static int bcmgenet_open(struct net_device *dev)
bcmgenet_set_hw_addr(priv, dev->dev_addr);
- if (phy_is_internal(priv->phydev)) {
+ if (priv->internal_phy) {
reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
reg |= EXT_ENERGY_DET_MASK;
bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
@@ -2687,18 +2686,20 @@ static int bcmgenet_open(struct net_device *dev)
goto err_irq0;
}
- /* Re-configure the port multiplexer towards the PHY device */
- bcmgenet_mii_config(priv->dev, false);
-
- phy_connect_direct(dev, priv->phydev, bcmgenet_mii_setup,
- priv->phy_interface);
+ ret = bcmgenet_mii_probe(dev);
+ if (ret) {
+ netdev_err(dev, "failed to connect to PHY\n");
+ goto err_irq1;
+ }
bcmgenet_netif_start(dev);
return 0;
+err_irq1:
+ free_irq(priv->irq1, priv);
err_irq0:
- free_irq(priv->irq0, dev);
+ free_irq(priv->irq0, priv);
err_fini_dma:
bcmgenet_fini_dma(priv);
err_clk_disable:
@@ -2757,7 +2758,7 @@ static int bcmgenet_close(struct net_device *dev)
free_irq(priv->irq0, priv);
free_irq(priv->irq1, priv);
- if (phy_is_internal(priv->phydev))
+ if (priv->internal_phy)
ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
if (!IS_ERR(priv->clk))
@@ -3319,7 +3320,7 @@ static int bcmgenet_suspend(struct device *d)
if (device_may_wakeup(d) && priv->wolopts) {
ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
clk_prepare_enable(priv->clk_wol);
- } else if (phy_is_internal(priv->phydev)) {
+ } else if (priv->internal_phy) {
ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
}
@@ -3348,7 +3349,7 @@ static int bcmgenet_resume(struct device *d)
/* If this is an internal GPHY, power it back on now, before UniMAC is
* brought out of reset as absolutely no UniMAC activity is allowed
*/
- if (phy_is_internal(priv->phydev))
+ if (priv->internal_phy)
bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
bcmgenet_umac_reset(priv);
@@ -3363,14 +3364,14 @@ static int bcmgenet_resume(struct device *d)
phy_init_hw(priv->phydev);
/* Speed settings must be restored */
- bcmgenet_mii_config(priv->dev, false);
+ bcmgenet_mii_config(priv->dev);
/* disable ethernet MAC while updating its registers */
umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false);
bcmgenet_set_hw_addr(priv, dev->dev_addr);
- if (phy_is_internal(priv->phydev)) {
+ if (priv->internal_phy) {
reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
reg |= EXT_ENERGY_DET_MASK;
bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 6159deab8c98..7299d1075422 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -593,6 +593,7 @@ struct bcmgenet_priv {
/* MDIO bus variables */
wait_queue_head_t wq;
struct phy_device *phydev;
+ bool internal_phy;
struct device_node *phy_dn;
struct device_node *mdio_dn;
struct mii_bus *mii_bus;
@@ -670,9 +671,9 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF);
/* MDIO routines */
int bcmgenet_mii_init(struct net_device *dev);
-int bcmgenet_mii_config(struct net_device *dev, bool init);
+int bcmgenet_mii_config(struct net_device *dev);
+int bcmgenet_mii_probe(struct net_device *dev);
void bcmgenet_mii_exit(struct net_device *dev);
-void bcmgenet_mii_reset(struct net_device *dev);
void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
void bcmgenet_mii_setup(struct net_device *dev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index adf23d2ac488..0802cd9d2424 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -163,16 +163,6 @@ void bcmgenet_mii_setup(struct net_device *dev)
phy_print_status(phydev);
}
-void bcmgenet_mii_reset(struct net_device *dev)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
-
- if (priv->phydev) {
- phy_init_hw(priv->phydev);
- phy_start_aneg(priv->phydev);
- }
-}
-
void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -215,7 +205,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev)
reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
reg |= EXT_PWR_DN_EN_LD;
bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
- bcmgenet_mii_reset(dev);
}
static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
@@ -228,7 +217,7 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
}
-int bcmgenet_mii_config(struct net_device *dev, bool init)
+int bcmgenet_mii_config(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct phy_device *phydev = priv->phydev;
@@ -238,10 +227,10 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
u32 port_ctrl;
u32 reg;
- priv->ext_phy = !phy_is_internal(priv->phydev) &&
+ priv->ext_phy = !priv->internal_phy &&
(priv->phy_interface != PHY_INTERFACE_MODE_MOCA);
- if (phy_is_internal(priv->phydev))
+ if (priv->internal_phy)
priv->phy_interface = PHY_INTERFACE_MODE_NA;
switch (priv->phy_interface) {
@@ -259,7 +248,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
bcmgenet_sys_writel(priv, port_ctrl, SYS_PORT_CTRL);
- if (phy_is_internal(priv->phydev)) {
+ if (priv->internal_phy) {
phy_name = "internal PHY";
bcmgenet_internal_phy_setup(dev);
} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
@@ -321,13 +310,12 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
}
- if (init)
- dev_info(kdev, "configuring instance for %s\n", phy_name);
+ dev_info_once(kdev, "configuring instance for %s\n", phy_name);
return 0;
}
-static int bcmgenet_mii_probe(struct net_device *dev)
+int bcmgenet_mii_probe(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct device_node *dn = priv->pdev->dev.of_node;
@@ -345,22 +333,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
priv->old_pause = -1;
if (dn) {
- if (priv->phydev) {
- pr_info("PHY already attached\n");
- return 0;
- }
-
- /* In the case of a fixed PHY, the DT node associated
- * to the PHY is the Ethernet MAC DT node.
- */
- if (!priv->phy_dn && of_phy_is_fixed_link(dn)) {
- ret = of_phy_register_fixed_link(dn);
- if (ret)
- return ret;
-
- priv->phy_dn = of_node_get(dn);
- }
-
phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
phy_flags, priv->phy_interface);
if (!phydev) {
@@ -386,7 +358,7 @@ static int bcmgenet_mii_probe(struct net_device *dev)
* PHY speed which is needed for bcmgenet_mii_config() to configure
* things appropriately.
*/
- ret = bcmgenet_mii_config(dev, true);
+ ret = bcmgenet_mii_config(dev);
if (ret) {
phy_disconnect(priv->phydev);
return ret;
@@ -397,14 +369,11 @@ static int bcmgenet_mii_probe(struct net_device *dev)
/* The internal PHY has its link interrupts routed to the
* Ethernet MAC ISRs
*/
- if (phy_is_internal(priv->phydev))
+ if (priv->internal_phy)
priv->mii_bus->irq[phydev->addr] = PHY_IGNORE_INTERRUPT;
else
priv->mii_bus->irq[phydev->addr] = PHY_POLL;
- pr_info("attached PHY at address %d [%s]\n",
- phydev->addr, phydev->drv->name);
-
return 0;
}
@@ -490,7 +459,9 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
{
struct device_node *dn = priv->pdev->dev.of_node;
struct device *kdev = &priv->pdev->dev;
+ const char *phy_mode_str = NULL;
char *compat;
+ int phy_mode;
int ret;
compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version);
@@ -513,8 +484,36 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
/* Fetch the PHY phandle */
priv->phy_dn = of_parse_phandle(dn, "phy-handle", 0);
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (!priv->phy_dn && of_phy_is_fixed_link(dn)) {
+ ret = of_phy_register_fixed_link(dn);
+ if (ret)
+ return ret;
+
+ priv->phy_dn = of_node_get(dn);
+ }
+
/* Get the link mode */
- priv->phy_interface = of_get_phy_mode(dn);
+ phy_mode = of_get_phy_mode(dn);
+ priv->phy_interface = phy_mode;
+
+ /* We need to specifically look up whether this PHY interface is internal
+ * or not *before* we even try to probe the PHY driver over MDIO as we
+ * may have shut down the internal PHY for power saving purposes.
+ */
+ if (phy_mode < 0) {
+ ret = of_property_read_string(dn, "phy-mode", &phy_mode_str);
+ if (ret < 0) {
+ dev_err(kdev, "invalid PHY mode property\n");
+ return ret;
+ }
+
+ priv->phy_interface = PHY_INTERFACE_MODE_NA;
+ if (!strcasecmp(phy_mode_str, "internal"))
+ priv->internal_phy = true;
+ }
return 0;
}
@@ -615,10 +614,6 @@ int bcmgenet_mii_init(struct net_device *dev)
ret = bcmgenet_mii_bus_init(priv);
if (ret)
- goto out_free;
-
- ret = bcmgenet_mii_probe(dev);
- if (ret)
goto out;
return 0;
@@ -626,7 +621,6 @@ int bcmgenet_mii_init(struct net_device *dev)
out:
of_node_put(priv->phy_dn);
mdiobus_unregister(priv->mii_bus);
-out_free:
kfree(priv->mii_bus->irq);
mdiobus_free(priv->mii_bus);
return ret;
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index f3f1601a76f3..f44a39c40642 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -224,7 +224,8 @@ static int enic_get_coalesce(struct net_device *netdev,
struct enic *enic = netdev_priv(netdev);
struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
- ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
+ if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
+ ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs;
if (rxcoal->use_adaptive_rx_coalesce)
ecmd->use_adaptive_rx_coalesce = 1;
@@ -234,6 +235,53 @@ static int enic_get_coalesce(struct net_device *netdev,
return 0;
}
+static int enic_coalesce_valid(struct enic *enic,
+ struct ethtool_coalesce *ec)
+{
+ u32 coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
+ u32 rx_coalesce_usecs_high = min_t(u32, coalesce_usecs_max,
+ ec->rx_coalesce_usecs_high);
+ u32 rx_coalesce_usecs_low = min_t(u32, coalesce_usecs_max,
+ ec->rx_coalesce_usecs_low);
+
+ if (ec->rx_max_coalesced_frames ||
+ ec->rx_coalesce_usecs_irq ||
+ ec->rx_max_coalesced_frames_irq ||
+ ec->tx_max_coalesced_frames ||
+ ec->tx_coalesce_usecs_irq ||
+ ec->tx_max_coalesced_frames_irq ||
+ ec->stats_block_coalesce_usecs ||
+ ec->use_adaptive_tx_coalesce ||
+ ec->pkt_rate_low ||
+ ec->rx_max_coalesced_frames_low ||
+ ec->tx_coalesce_usecs_low ||
+ ec->tx_max_coalesced_frames_low ||
+ ec->pkt_rate_high ||
+ ec->rx_max_coalesced_frames_high ||
+ ec->tx_coalesce_usecs_high ||
+ ec->tx_max_coalesced_frames_high ||
+ ec->rate_sample_interval)
+ return -EINVAL;
+
+ if ((vnic_dev_get_intr_mode(enic->vdev) != VNIC_DEV_INTR_MODE_MSIX) &&
+ ec->tx_coalesce_usecs)
+ return -EINVAL;
+
+ if ((ec->tx_coalesce_usecs > coalesce_usecs_max) ||
+ (ec->rx_coalesce_usecs > coalesce_usecs_max) ||
+ (ec->rx_coalesce_usecs_low > coalesce_usecs_max) ||
+ (ec->rx_coalesce_usecs_high > coalesce_usecs_max))
+ netdev_info(enic->netdev, "ethtool_set_coalesce: adaptor supports max coalesce value of %d. Setting max value.\n",
+ coalesce_usecs_max);
+
+ if (ec->rx_coalesce_usecs_high &&
+ (rx_coalesce_usecs_high <
+ rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
+ return -EINVAL;
+
+ return 0;
+}
+
static int enic_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ecmd)
{
@@ -244,8 +292,12 @@ static int enic_set_coalesce(struct net_device *netdev,
u32 rx_coalesce_usecs_high;
u32 coalesce_usecs_max;
unsigned int i, intr;
+ int ret;
struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
+ ret = enic_coalesce_valid(enic, ecmd);
+ if (ret)
+ return ret;
coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs,
coalesce_usecs_max);
@@ -257,59 +309,24 @@ static int enic_set_coalesce(struct net_device *netdev,
rx_coalesce_usecs_high = min_t(u32, ecmd->rx_coalesce_usecs_high,
coalesce_usecs_max);
- switch (vnic_dev_get_intr_mode(enic->vdev)) {
- case VNIC_DEV_INTR_MODE_INTX:
- if (tx_coalesce_usecs != rx_coalesce_usecs)
- return -EINVAL;
- if (ecmd->use_adaptive_rx_coalesce ||
- ecmd->rx_coalesce_usecs_low ||
- ecmd->rx_coalesce_usecs_high)
- return -EINVAL;
-
- intr = enic_legacy_io_intr();
- vnic_intr_coalescing_timer_set(&enic->intr[intr],
- tx_coalesce_usecs);
- break;
- case VNIC_DEV_INTR_MODE_MSI:
- if (tx_coalesce_usecs != rx_coalesce_usecs)
- return -EINVAL;
- if (ecmd->use_adaptive_rx_coalesce ||
- ecmd->rx_coalesce_usecs_low ||
- ecmd->rx_coalesce_usecs_high)
- return -EINVAL;
-
- vnic_intr_coalescing_timer_set(&enic->intr[0],
- tx_coalesce_usecs);
- break;
- case VNIC_DEV_INTR_MODE_MSIX:
- if (ecmd->rx_coalesce_usecs_high &&
- (rx_coalesce_usecs_high <
- rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
- return -EINVAL;
-
+ if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) {
for (i = 0; i < enic->wq_count; i++) {
intr = enic_msix_wq_intr(enic, i);
vnic_intr_coalescing_timer_set(&enic->intr[intr],
- tx_coalesce_usecs);
- }
-
- rxcoal->use_adaptive_rx_coalesce =
- !!ecmd->use_adaptive_rx_coalesce;
- if (!rxcoal->use_adaptive_rx_coalesce)
- enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
-
- if (ecmd->rx_coalesce_usecs_high) {
- rxcoal->range_end = rx_coalesce_usecs_high;
- rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
- rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
- ENIC_AIC_LARGE_PKT_DIFF;
+ tx_coalesce_usecs);
}
- break;
- default:
- break;
+ enic->tx_coalesce_usecs = tx_coalesce_usecs;
+ }
+ rxcoal->use_adaptive_rx_coalesce = !!ecmd->use_adaptive_rx_coalesce;
+ if (!rxcoal->use_adaptive_rx_coalesce)
+ enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
+ if (ecmd->rx_coalesce_usecs_high) {
+ rxcoal->range_end = rx_coalesce_usecs_high;
+ rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
+ rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
+ ENIC_AIC_LARGE_PKT_DIFF;
}
- enic->tx_coalesce_usecs = tx_coalesce_usecs;
enic->rx_coalesce_usecs = rx_coalesce_usecs;
return 0;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 918a8e42139b..8f646e4e968b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1149,6 +1149,64 @@ static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
return 0;
}
+static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+ unsigned int intr = enic_msix_rq_intr(enic, rq->index);
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ u32 timer = cq->tobe_rx_coal_timeval;
+
+ if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
+ vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+ cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
+ }
+}
+
+static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+ struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
+ int index;
+ u32 timer;
+ u32 range_start;
+ u32 traffic;
+ u64 delta;
+ ktime_t now = ktime_get();
+
+ delta = ktime_us_delta(now, cq->prev_ts);
+ if (delta < ENIC_AIC_TS_BREAK)
+ return;
+ cq->prev_ts = now;
+
+ traffic = pkt_size_counter->large_pkt_bytes_cnt +
+ pkt_size_counter->small_pkt_bytes_cnt;
+ /* The table takes Mbps
+ * traffic *= 8 => bits
+ * traffic *= (10^6 / delta) => bps
+ * traffic /= 10^6 => Mbps
+ *
+ * Combining, traffic *= (8 / delta)
+ */
+
+ traffic <<= 3;
+ traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
+
+ for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
+ if (traffic < mod_table[index].rx_rate)
+ break;
+ range_start = (pkt_size_counter->small_pkt_bytes_cnt >
+ pkt_size_counter->large_pkt_bytes_cnt << 1) ?
+ rx_coal->small_pkt_range_start :
+ rx_coal->large_pkt_range_start;
+ timer = range_start + ((rx_coal->range_end - range_start) *
+ mod_table[index].range_percent / 100);
+ /* Damping */
+ cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
+
+ pkt_size_counter->large_pkt_bytes_cnt = 0;
+ pkt_size_counter->small_pkt_bytes_cnt = 0;
+}
+
static int enic_poll(struct napi_struct *napi, int budget)
{
struct net_device *netdev = napi->dev;
@@ -1199,6 +1257,11 @@ static int enic_poll(struct napi_struct *napi, int budget)
if (err)
rq_work_done = rq_work_to_do;
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ /* Call the function which refreshes the intr coalescing timer
+ * value based on the traffic.
+ */
+ enic_calc_int_moderation(enic, &enic->rq[0]);
if (rq_work_done < rq_work_to_do) {
@@ -1207,70 +1270,14 @@ static int enic_poll(struct napi_struct *napi, int budget)
*/
napi_complete(napi);
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ enic_set_int_moderation(enic, &enic->rq[0]);
vnic_intr_unmask(&enic->intr[intr]);
}
return rq_work_done;
}
-static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
-{
- unsigned int intr = enic_msix_rq_intr(enic, rq->index);
- struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
- u32 timer = cq->tobe_rx_coal_timeval;
-
- if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
- vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
- cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
- }
-}
-
-static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
-{
- struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
- struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
- struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
- int index;
- u32 timer;
- u32 range_start;
- u32 traffic;
- u64 delta;
- ktime_t now = ktime_get();
-
- delta = ktime_us_delta(now, cq->prev_ts);
- if (delta < ENIC_AIC_TS_BREAK)
- return;
- cq->prev_ts = now;
-
- traffic = pkt_size_counter->large_pkt_bytes_cnt +
- pkt_size_counter->small_pkt_bytes_cnt;
- /* The table takes Mbps
- * traffic *= 8 => bits
- * traffic *= (10^6 / delta) => bps
- * traffic /= 10^6 => Mbps
- *
- * Combining, traffic *= (8 / delta)
- */
-
- traffic <<= 3;
- traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
-
- for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
- if (traffic < mod_table[index].rx_rate)
- break;
- range_start = (pkt_size_counter->small_pkt_bytes_cnt >
- pkt_size_counter->large_pkt_bytes_cnt << 1) ?
- rx_coal->small_pkt_range_start :
- rx_coal->large_pkt_range_start;
- timer = range_start + ((rx_coal->range_end - range_start) *
- mod_table[index].range_percent / 100);
- /* Damping */
- cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
-
- pkt_size_counter->large_pkt_bytes_cnt = 0;
- pkt_size_counter->small_pkt_bytes_cnt = 0;
-}
-
#ifdef CONFIG_RFS_ACCEL
static void enic_free_rx_cpu_rmap(struct enic *enic)
{
@@ -1407,10 +1414,8 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
if (err)
work_done = work_to_do;
if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
- /* Call the function which refreshes
- * the intr coalescing timer value based on
- * the traffic. This is supported only in
- * the case of MSI-x mode
+ /* Call the function which refreshes the intr coalescing timer
+ * value based on the traffic.
*/
enic_calc_int_moderation(enic, &enic->rq[rq]);
@@ -1569,12 +1574,6 @@ static void enic_set_rx_coal_setting(struct enic *enic)
int index = -1;
struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
- /* If intr mode is not MSIX, do not do adaptive coalescing */
- if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
- netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
- return;
- }
-
/* 1. Read the link speed from fw
* 2. Pick the default range for the speed
* 3. Update it in enic->rx_coalesce_setting
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 932428314b80..7b4c3474acfe 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -202,6 +202,7 @@ enum {
ROCKER_CTRL_IPV4_MCAST,
ROCKER_CTRL_IPV6_MCAST,
ROCKER_CTRL_DFLT_BRIDGING,
+ ROCKER_CTRL_DFLT_OVS,
ROCKER_CTRL_MAX,
};
@@ -321,9 +322,21 @@ static u16 rocker_port_vlan_to_vid(const struct rocker_port *rocker_port,
return ntohs(vlan_id);
}
+static bool rocker_port_is_slave(const struct rocker_port *rocker_port,
+ const char *kind)
+{
+ return rocker_port->bridge_dev &&
+ !strcmp(rocker_port->bridge_dev->rtnl_link_ops->kind, kind);
+}
+
static bool rocker_port_is_bridged(const struct rocker_port *rocker_port)
{
- return !!rocker_port->bridge_dev;
+ return rocker_port_is_slave(rocker_port, "bridge");
+}
+
+static bool rocker_port_is_ovsed(const struct rocker_port *rocker_port)
+{
+ return rocker_port_is_slave(rocker_port, "openvswitch");
}
#define ROCKER_OP_FLAG_REMOVE BIT(0)
@@ -3275,6 +3288,12 @@ static struct rocker_ctrl {
.bridge = true,
.copy_to_cpu = true,
},
+ [ROCKER_CTRL_DFLT_OVS] = {
+ /* pass all pkts up to CPU */
+ .eth_dst = zero_mac,
+ .eth_dst_mask = zero_mac,
+ .acl = true,
+ },
};
static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port,
@@ -3787,11 +3806,14 @@ static int rocker_port_stp_update(struct rocker_port *rocker_port,
break;
case BR_STATE_LEARNING:
case BR_STATE_FORWARDING:
- want[ROCKER_CTRL_LINK_LOCAL_MCAST] = true;
+ if (!rocker_port_is_ovsed(rocker_port))
+ want[ROCKER_CTRL_LINK_LOCAL_MCAST] = true;
want[ROCKER_CTRL_IPV4_MCAST] = true;
want[ROCKER_CTRL_IPV6_MCAST] = true;
if (rocker_port_is_bridged(rocker_port))
want[ROCKER_CTRL_DFLT_BRIDGING] = true;
+ else if (rocker_port_is_ovsed(rocker_port))
+ want[ROCKER_CTRL_DFLT_OVS] = true;
else
want[ROCKER_CTRL_LOCAL_ARP] = true;
break;
@@ -4800,6 +4822,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+ u16 rx_flags = 0;
if (!skb)
return -ENOENT;
@@ -4807,6 +4830,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+ if (attrs[ROCKER_TLV_RX_FLAGS])
+ rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
@@ -4814,6 +4839,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
skb_put(skb, rx_len);
skb->protocol = eth_type_trans(skb, rocker_port->dev);
+ if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
+ skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+
rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
@@ -4951,6 +4979,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
+ switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
@@ -5230,6 +5260,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
rocker_port->bridge_dev = bridge;
+ switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5250,6 +5281,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
rocker_port->dev->ifindex);
+ switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
+ false);
rocker_port->bridge_dev = NULL;
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
@@ -5264,23 +5297,39 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port)
return err;
}
+
+static int rocker_port_ovs_changed(struct rocker_port *rocker_port,
+ struct net_device *master)
+{
+ int err;
+
+ rocker_port->bridge_dev = master;
+
+ err = rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE, 0);
+ if (err)
+ return err;
+ err = rocker_port_fwd_enable(rocker_port, SWITCHDEV_TRANS_NONE, 0);
+
+ return err;
+}
+
static int rocker_port_master_changed(struct net_device *dev)
{
struct rocker_port *rocker_port = netdev_priv(dev);
struct net_device *master = netdev_master_upper_dev_get(dev);
int err = 0;
- /* There are currently three cases handled here:
- * 1. Joining a bridge
- * 2. Leaving a previously joined bridge
- * 3. Other, e.g. being added to or removed from a bond or openvswitch,
- * in which case nothing is done
- */
- if (master && master->rtnl_link_ops &&
- !strcmp(master->rtnl_link_ops->kind, "bridge"))
- err = rocker_port_bridge_join(rocker_port, master);
- else if (rocker_port_is_bridged(rocker_port))
+ /* N.B: Do nothing if the type of master is not supported */
+ if (master && master->rtnl_link_ops) {
+ if (!strcmp(master->rtnl_link_ops->kind, "bridge"))
+ err = rocker_port_bridge_join(rocker_port, master);
+ else if (!strcmp(master->rtnl_link_ops->kind, "openvswitch"))
+ err = rocker_port_ovs_changed(rocker_port, master);
+ } else if (rocker_port_is_bridged(rocker_port)) {
err = rocker_port_bridge_leave(rocker_port);
+ } else if (rocker_port_is_ovsed(rocker_port)) {
+ err = rocker_port_ovs_changed(rocker_port, NULL);
+ }
return err;
}
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 08b2c3d96188..12490b2f6504 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -246,6 +246,7 @@ enum {
#define ROCKER_RX_FLAGS_TCP BIT(5)
#define ROCKER_RX_FLAGS_UDP BIT(6)
#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOAD BIT(8)
enum {
ROCKER_TLV_TX_UNSPEC,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index cb888d3ebbdc..78e9d1861896 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -25,66 +25,53 @@
# define LPC18XX_CREG_CREG6_ETHMODE_MII 0x0
# define LPC18XX_CREG_CREG6_ETHMODE_RMII 0x4
-struct lpc18xx_dwmac_priv_data {
+static int lpc18xx_dwmac_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
struct regmap *reg;
- int interface;
-};
+ u8 ethmode;
+ int ret;
-static void *lpc18xx_dwmac_setup(struct platform_device *pdev)
-{
- struct lpc18xx_dwmac_priv_data *dwmac;
+ ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (ret)
+ return ret;
- dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac)
- return ERR_PTR(-ENOMEM);
+ plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+ if (IS_ERR(plat_dat))
+ return PTR_ERR(plat_dat);
- dwmac->interface = of_get_phy_mode(pdev->dev.of_node);
- if (dwmac->interface < 0)
- return ERR_PTR(dwmac->interface);
+ plat_dat->has_gmac = true;
- dwmac->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
- if (IS_ERR(dwmac->reg)) {
- dev_err(&pdev->dev, "Syscon lookup failed\n");
- return dwmac->reg;
+ reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
+ if (IS_ERR(reg)) {
+ dev_err(&pdev->dev, "syscon lookup failed\n");
+ return PTR_ERR(reg);
}
- return dwmac;
-}
-
-static int lpc18xx_dwmac_init(struct platform_device *pdev, void *priv)
-{
- struct lpc18xx_dwmac_priv_data *dwmac = priv;
- u8 ethmode;
-
- if (dwmac->interface == PHY_INTERFACE_MODE_MII) {
+ if (plat_dat->interface == PHY_INTERFACE_MODE_MII) {
ethmode = LPC18XX_CREG_CREG6_ETHMODE_MII;
- } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) {
+ } else if (plat_dat->interface == PHY_INTERFACE_MODE_RMII) {
ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII;
} else {
dev_err(&pdev->dev, "Only MII and RMII mode supported\n");
return -EINVAL;
}
- regmap_update_bits(dwmac->reg, LPC18XX_CREG_CREG6,
+ regmap_update_bits(reg, LPC18XX_CREG_CREG6,
LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode);
- return 0;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
-static const struct stmmac_of_data lpc18xx_dwmac_data = {
- .has_gmac = 1,
- .setup = lpc18xx_dwmac_setup,
- .init = lpc18xx_dwmac_init,
-};
-
static const struct of_device_id lpc18xx_dwmac_match[] = {
- { .compatible = "nxp,lpc1850-dwmac", .data = &lpc18xx_dwmac_data },
+ { .compatible = "nxp,lpc1850-dwmac" },
{ }
};
MODULE_DEVICE_TABLE(of, lpc18xx_dwmac_match);
static struct platform_driver lpc18xx_dwmac_driver = {
- .probe = stmmac_pltfr_probe,
+ .probe = lpc18xx_dwmac_probe,
.remove = stmmac_pltfr_remove,
.driver = {
.name = "lpc18xx-dwmac",
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 61a324a87d09..c1bac1912b37 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -47,36 +47,45 @@ static void meson6_dwmac_fix_mac_speed(void *priv, unsigned int speed)
writel(val, dwmac->reg);
}
-static void *meson6_dwmac_setup(struct platform_device *pdev)
+static int meson6_dwmac_probe(struct platform_device *pdev)
{
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
struct meson_dwmac *dwmac;
struct resource *res;
+ int ret;
+
+ ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (ret)
+ return ret;
+
+ plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+ if (IS_ERR(plat_dat))
+ return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
if (!dwmac)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
dwmac->reg = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(dwmac->reg))
- return ERR_CAST(dwmac->reg);
+ return PTR_ERR(dwmac->reg);
- return dwmac;
-}
+ plat_dat->bsp_priv = dwmac;
+ plat_dat->fix_mac_speed = meson6_dwmac_fix_mac_speed;
-static const struct stmmac_of_data meson6_dwmac_data = {
- .setup = meson6_dwmac_setup,
- .fix_mac_speed = meson6_dwmac_fix_mac_speed,
-};
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+}
static const struct of_device_id meson6_dwmac_match[] = {
- { .compatible = "amlogic,meson6-dwmac", .data = &meson6_dwmac_data},
+ { .compatible = "amlogic,meson6-dwmac" },
{ }
};
MODULE_DEVICE_TABLE(of, meson6_dwmac_match);
static struct platform_driver meson6_dwmac_driver = {
- .probe = stmmac_pltfr_probe,
+ .probe = meson6_dwmac_probe,
.remove = stmmac_pltfr_remove,
.driver = {
.name = "meson6-dwmac",
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index f3918c7e7eeb..eca0eb845241 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -104,18 +104,20 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries)
* this function is to read the driver parameters from device-tree and
* set some private fields that will be used by the main at runtime.
*/
-static int stmmac_probe_config_dt(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat,
- const char **mac)
+struct plat_stmmacenet_data *
+stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
{
struct device_node *np = pdev->dev.of_node;
+ struct plat_stmmacenet_data *plat;
+ const struct stmmac_of_data *data;
struct stmmac_dma_cfg *dma_cfg;
- const struct of_device_id *device;
- struct device *dev = &pdev->dev;
- device = of_match_device(dev->driver->of_match_table, dev);
- if (device->data) {
- const struct stmmac_of_data *data = device->data;
+ plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
+ if (!plat)
+ return ERR_PTR(-ENOMEM);
+
+ data = of_device_get_match_data(&pdev->dev);
+ if (data) {
plat->has_gmac = data->has_gmac;
plat->enh_desc = data->enh_desc;
plat->tx_coe = data->tx_coe;
@@ -151,7 +153,7 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
/* If phy-handle is not specified, check if we have a fixed-phy */
if (!plat->phy_node && of_phy_is_fixed_link(np)) {
if ((of_phy_register_fixed_link(np) < 0))
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
plat->phy_node = of_node_get(np);
}
@@ -182,6 +184,12 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
*/
plat->maxmtu = JUMBO_LEN;
+ /* Set default value for multicast hash bins */
+ plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+ /* Set default value for unicast filter entries */
+ plat->unicast_filter_entries = 1;
+
/*
* Currently only the properties needed on SPEAr600
* are provided. All other properties should be added
@@ -222,7 +230,7 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
GFP_KERNEL);
if (!dma_cfg) {
of_node_put(np);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
plat->dma_cfg = dma_cfg;
of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
@@ -240,44 +248,34 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
}
- return 0;
+ return plat;
}
#else
-static int stmmac_probe_config_dt(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat,
- const char **mac)
+struct plat_stmmacenet_data *
+stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
{
- return -ENOSYS;
+ return ERR_PTR(-ENOSYS);
}
#endif /* CONFIG_OF */
+EXPORT_SYMBOL_GPL(stmmac_probe_config_dt);
-/**
- * stmmac_pltfr_probe - platform driver probe.
- * @pdev: platform device pointer
- * Description: platform_device probe function. It is to allocate
- * the necessary platform resources, invoke custom helper (if required) and
- * invoke the main probe function.
- */
-int stmmac_pltfr_probe(struct platform_device *pdev)
+int stmmac_get_platform_resources(struct platform_device *pdev,
+ struct stmmac_resources *stmmac_res)
{
- struct stmmac_resources stmmac_res;
- int ret = 0;
struct resource *res;
- struct device *dev = &pdev->dev;
- struct plat_stmmacenet_data *plat_dat = NULL;
- memset(&stmmac_res, 0, sizeof(stmmac_res));
+ memset(stmmac_res, 0, sizeof(*stmmac_res));
/* Get IRQ information early to have an ability to ask for deferred
* probe if needed before we went too far with resource allocation.
*/
- stmmac_res.irq = platform_get_irq_byname(pdev, "macirq");
- if (stmmac_res.irq < 0) {
- if (stmmac_res.irq != -EPROBE_DEFER) {
- dev_err(dev,
+ stmmac_res->irq = platform_get_irq_byname(pdev, "macirq");
+ if (stmmac_res->irq < 0) {
+ if (stmmac_res->irq != -EPROBE_DEFER) {
+ dev_err(&pdev->dev,
"MAC IRQ configuration information not found\n");
}
- return stmmac_res.irq;
+ return stmmac_res->irq;
}
/* On some platforms e.g. SPEAr the wake up irq differs from the mac irq
@@ -287,45 +285,61 @@ int stmmac_pltfr_probe(struct platform_device *pdev)
* In case the wake up interrupt is not passed from the platform
* so the driver will continue to use the mac irq (ndev->irq)
*/
- stmmac_res.wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
- if (stmmac_res.wol_irq < 0) {
- if (stmmac_res.wol_irq == -EPROBE_DEFER)
+ stmmac_res->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
+ if (stmmac_res->wol_irq < 0) {
+ if (stmmac_res->wol_irq == -EPROBE_DEFER)
return -EPROBE_DEFER;
- stmmac_res.wol_irq = stmmac_res.irq;
+ stmmac_res->wol_irq = stmmac_res->irq;
}
- stmmac_res.lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
- if (stmmac_res.lpi_irq == -EPROBE_DEFER)
+ stmmac_res->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
+ if (stmmac_res->lpi_irq == -EPROBE_DEFER)
return -EPROBE_DEFER;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- stmmac_res.addr = devm_ioremap_resource(dev, res);
- if (IS_ERR(stmmac_res.addr))
- return PTR_ERR(stmmac_res.addr);
-
- plat_dat = dev_get_platdata(&pdev->dev);
-
- if (!plat_dat)
- plat_dat = devm_kzalloc(&pdev->dev,
- sizeof(struct plat_stmmacenet_data),
- GFP_KERNEL);
- if (!plat_dat) {
- pr_err("%s: ERROR: no memory", __func__);
- return -ENOMEM;
- }
+ stmmac_res->addr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(stmmac_res->addr))
+ return PTR_ERR(stmmac_res->addr);
- /* Set default value for multicast hash bins */
- plat_dat->multicast_filter_bins = HASH_TABLE_SIZE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(stmmac_get_platform_resources);
- /* Set default value for unicast filter entries */
- plat_dat->unicast_filter_entries = 1;
+/**
+ * stmmac_pltfr_probe - platform driver probe.
+ * @pdev: platform device pointer
+ * Description: platform_device probe function. It is to allocate
+ * the necessary platform resources, invoke custom helper (if required) and
+ * invoke the main probe function.
+ */
+int stmmac_pltfr_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
+ int ret;
+
+ ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (ret)
+ return ret;
if (pdev->dev.of_node) {
- ret = stmmac_probe_config_dt(pdev, plat_dat, &stmmac_res.mac);
- if (ret) {
- pr_err("%s: main dt probe failed", __func__);
- return ret;
+ plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+ if (IS_ERR(plat_dat)) {
+ dev_err(&pdev->dev, "dt configuration failed\n");
+ return PTR_ERR(plat_dat);
+ }
+ } else {
+ plat_dat = dev_get_platdata(&pdev->dev);
+ if (!plat_dat) {
+ dev_err(&pdev->dev, "no platform data provided\n");
+ return -EINVAL;
}
+
+ /* Set default value for multicast hash bins */
+ plat_dat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+ /* Set default value for unicast filter entries */
+ plat_dat->unicast_filter_entries = 1;
}
/* Custom setup (if needed) */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index 71da86d7bd00..84ceb5342686 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -19,6 +19,14 @@
#ifndef __STMMAC_PLATFORM_H__
#define __STMMAC_PLATFORM_H__
+#include "stmmac.h"
+
+struct plat_stmmacenet_data *
+stmmac_probe_config_dt(struct platform_device *pdev, const char **mac);
+
+int stmmac_get_platform_resources(struct platform_device *pdev,
+ struct stmmac_resources *stmmac_res);
+
int stmmac_pltfr_probe(struct platform_device *pdev);
int stmmac_pltfr_remove(struct platform_device *pdev);
extern const struct dev_pm_ops stmmac_pltfr_pm_ops;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45cfd797eb77..607b5f41f46f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
};
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+ struct netdev_phys_item_id *b)
+{
+ return a->id_len == b->id_len &&
+ memcmp(a->id, b->id, a->id_len) == 0;
+}
+
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb);
@@ -1456,6 +1463,8 @@ enum netdev_priv_flags {
*
* @xps_maps: XXX: need comments on this one
*
+ * @offload_fwd_mark: Offload device fwding mark
+ *
* @trans_start: Time (in jiffies) of last Tx
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog ( see dev_watchdog() )
@@ -1697,6 +1706,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
#endif
+#ifdef CONFIG_NET_SWITCHDEV
+ u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e87d53..6bd96fe9416a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
* @mark: Generic packet mark
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
@@ -650,9 +651,15 @@ struct sk_buff {
unsigned int sender_cpu;
};
#endif
+ union {
#ifdef CONFIG_NETWORK_SECMARK
- __u32 secmark;
+ __u32 secmark;
+#endif
+#ifdef CONFIG_NET_SWITCHDEV
+ __u32 offload_fwd_mark;
#endif
+ };
+
union {
__u32 mark;
__u32 reserved_tailroom;
@@ -2671,12 +2678,6 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
skb_shinfo(skb)->frag_list = NULL;
}
-static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
-{
- frag->next = skb_shinfo(skb)->frag_list;
- skb_shinfo(skb)->frag_list = frag;
-}
-
#define skb_walk_frags(skb, iter) \
for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c735f5c91eea..c86a20047cb1 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -123,8 +123,6 @@ struct plat_stmmacenet_data {
void (*free)(struct platform_device *pdev, void *priv);
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
- void *custom_cfg;
- void *custom_data;
void *bsp_priv;
};
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index c15d39456e14..ccd6d8bffa4d 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -49,9 +49,38 @@ static inline void sock_update_classid(struct sock *sk)
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
+
+static inline u32 task_get_classid(const struct sk_buff *skb)
+{
+ u32 classid = task_cls_state(current)->classid;
+
+ /* Due to the nature of the classifier it is required to ignore all
+ * packets originating from softirq context as accessing `current'
+ * would lead to false results.
+ *
+ * This test assumes that all callers of dev_queue_xmit() explicitly
+ * disable bh. Knowing this, it is possible to detect softirq based
+ * calls by looking at the number of nested bh disable calls because
+ * softirqs always disables bh.
+ */
+ if (in_serving_softirq()) {
+ /* If there is an sk_classid we'll use that. */
+ if (!skb->sk)
+ return 0;
+
+ classid = skb->sk->sk_classid;
+ }
+
+ return classid;
+}
#else /* !CONFIG_CGROUP_NET_CLASSID */
static inline void sock_update_classid(struct sock *sk)
{
}
+
+static inline u32 task_get_classid(const struct sk_buff *skb)
+{
+ return 0;
+}
#endif /* CONFIG_CGROUP_NET_CLASSID */
#endif /* _NET_CLS_CGROUP_H */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f118bfc..89da8934519b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+ struct net_device *group_dev,
+ bool joining);
#else
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
return -EOPNOTSUPP;
}
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+ struct net_device *group_dev,
+ bool joining)
+{
+}
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 29ef6f99e43d..2de87e58b12b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -249,6 +249,13 @@ enum bpf_func_id {
* Return: 0 on success
*/
BPF_FUNC_get_current_comm,
+
+ /**
+ * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
+ * @skb: pointer to skb
+ * Return: classid if != 0
+ */
+ BPF_FUNC_get_cgroup_classid,
__BPF_FUNC_MAX_ID,
};
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a538cb1199a3..45e4757c6fd2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -281,6 +281,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
br_fdb_delete_by_port(br, NULL, 0, 1);
br_vlan_flush(br);
+ br_multicast_dev_del(br);
del_timer_sync(&br->gc_timer);
br_sysfs_delbr(br->dev);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5a44cd9473f2..0dd3cd90962c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -273,6 +273,8 @@ static void br_multicast_del_pg(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
+ br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
+ p->state);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
if (!mp->ports && !mp->mglist &&
@@ -914,6 +916,15 @@ void br_multicast_add_port(struct net_bridge_port *port)
void br_multicast_del_port(struct net_bridge_port *port)
{
+ struct net_bridge *br = port->br;
+ struct net_bridge_port_group *pg;
+ struct hlist_node *n;
+
+ /* Take care of the remaining groups, only perm ones should be left */
+ spin_lock_bh(&br->multicast_lock);
+ hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
+ br_multicast_del_pg(br, pg);
+ spin_unlock_bh(&br->multicast_lock);
del_timer_sync(&port->multicast_router_timer);
}
@@ -953,7 +964,8 @@ void br_multicast_disable_port(struct net_bridge_port *port)
spin_lock(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- br_multicast_del_pg(br, pg);
+ if (pg->state == MDB_TEMPORARY)
+ br_multicast_del_pg(br, pg);
if (!hlist_unhashed(&port->rlist))
hlist_del_init_rcu(&port->rlist);
@@ -1730,12 +1742,6 @@ void br_multicast_open(struct net_bridge *br)
void br_multicast_stop(struct net_bridge *br)
{
- struct net_bridge_mdb_htable *mdb;
- struct net_bridge_mdb_entry *mp;
- struct hlist_node *n;
- u32 ver;
- int i;
-
del_timer_sync(&br->multicast_router_timer);
del_timer_sync(&br->ip4_other_query.timer);
del_timer_sync(&br->ip4_own_query.timer);
@@ -1743,6 +1749,15 @@ void br_multicast_stop(struct net_bridge *br)
del_timer_sync(&br->ip6_other_query.timer);
del_timer_sync(&br->ip6_own_query.timer);
#endif
+}
+
+void br_multicast_dev_del(struct net_bridge *br)
+{
+ struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_entry *mp;
+ struct hlist_node *n;
+ u32 ver;
+ int i;
spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c73fd785654d..3ad1290528af 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -466,6 +466,7 @@ void br_multicast_disable_port(struct net_bridge_port *port);
void br_multicast_init(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
+void br_multicast_dev_del(struct net_bridge *br);
void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb);
void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
@@ -565,6 +566,10 @@ static inline void br_multicast_stop(struct net_bridge *br)
{
}
+static inline void br_multicast_dev_del(struct net_bridge *br)
+{
+}
+
static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 8810b6bbebfe..2ee15afb412d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ /* Don't forward if offload device already forwarded */
+ if (skb->offload_fwd_mark &&
+ skb->offload_fwd_mark == dev->offload_fwd_mark) {
+ consume_skb(skb);
+ rc = NET_XMIT_SUCCESS;
+ goto out;
+ }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
diff --git a/net/core/filter.c b/net/core/filter.c
index be3098fb65e4..247450a5e387 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <net/sch_generic.h>
+#include <net/cls_cgroup.h>
/**
* sk_filter - run a packet through a socket filter
@@ -1424,6 +1425,18 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return task_get_classid((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
+ .func = bpf_get_cgroup_classid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1461,6 +1474,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_clone_redirect:
return &bpf_clone_redirect_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
default:
return sk_filter_func_proto(func_id);
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4c9a0246ef48..32153c248959 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1360,8 +1360,6 @@ out:
static int __ipv6_dev_get_saddr(struct net *net,
struct ipv6_saddr_dst *dst,
- unsigned int prefs,
- const struct in6_addr *saddr,
struct inet6_dev *idev,
struct ipv6_saddr_score *scores,
int hiscore_idx)
@@ -1485,13 +1483,13 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
if (use_oif_addr) {
if (idev)
- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores, hiscore_idx);
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} else {
for_each_netdev_rcu(net, dev) {
idev = __in6_dev_get(dev);
if (!idev)
continue;
- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores, hiscore_idx);
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
}
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d5f7716662db..c5fc85286ef6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1023,6 +1023,8 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = dst->dev->ifindex;
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index ea611b216412..4c85bd3a750c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -30,35 +30,16 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
- u32 classid;
-
- classid = task_cls_state(current)->classid;
-
- /*
- * Due to the nature of the classifier it is required to ignore all
- * packets originating from softirq context as accessing `current'
- * would lead to false results.
- *
- * This test assumes that all callers of dev_queue_xmit() explicitely
- * disable bh. Knowing this, it is possible to detect softirq based
- * calls by looking at the number of nested bh disable calls because
- * softirqs always disables bh.
- */
- if (in_serving_softirq()) {
- /* If there is an sk_classid we'll use that. */
- if (!skb->sk)
- return -1;
- classid = skb->sk->sk_classid;
- }
+ u32 classid = task_get_classid(skb);
if (!classid)
return -1;
-
if (!tcf_em_tree_match(skb, &head->ematches, NULL))
return -1;
res->classid = classid;
res->class = 0;
+
return tcf_exts_exec(skb, &head->exts, res);
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 9f2add3cba26..33bafa2e703e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
- if (nhsel > 0) {
- if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+ if (nhsel > 0 &&
+ !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
- if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
- attr.u.ppid.id_len))
- return NULL;
- }
prev_attr = attr;
}
@@ -1043,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
}
EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+ struct switchdev_attr a_attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
+ struct switchdev_attr b_attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
+
+ if (switchdev_port_attr_get(a, &a_attr) ||
+ switchdev_port_attr_get(b, &b_attr))
+ return false;
+
+ return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+ struct net_device *group_dev)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+ if (lower_dev == dev)
+ continue;
+ if (switchdev_port_same_parent_id(dev, lower_dev))
+ return lower_dev->offload_fwd_mark;
+ return switchdev_port_fwd_mark_get(dev, lower_dev);
+ }
+
+ return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+ if (lower_dev->offload_fwd_mark == old_mark) {
+ if (!*reset_mark)
+ *reset_mark = lower_dev->ifindex;
+ lower_dev->offload_fwd_mark = *reset_mark;
+ }
+ switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+ }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex. A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ * sw1p1 ifindex=2 mark=2
+ * sw1p2 ifindex=3 mark=2
+ * sw2p1 ifindex=4 mark=5
+ * sw2p2 ifindex=5 mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ * sw1p1 ifindex=2 mark=2
+ * sw1p2 ifindex=3 mark=2
+ * sw2p1 ifindex=4 mark=4
+ * sw2p2 ifindex=5 mark=5
+ */
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+ struct net_device *group_dev,
+ bool joining)
+{
+ u32 mark = dev->ifindex;
+ u32 reset_mark = 0;
+
+ if (group_dev && joining) {
+ mark = switchdev_port_fwd_mark_get(dev, group_dev);
+ } else if (group_dev && !joining) {
+ if (dev->offload_fwd_mark == mark)
+ /* Ohoh, this port was the mark reference port,
+ * but it's leaving the group, so reset the
+ * mark for the remaining ports in the group.
+ */
+ switchdev_port_fwd_mark_reset(group_dev, mark,
+ &reset_mark);
+ }
+
+ dev->offload_fwd_mark = mark;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a816382fc8af..8b010c976b2f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -316,6 +316,29 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
}
}
+void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr)
+{
+ u16 last = msg_last_bcast(hdr);
+ int mtyp = msg_type(hdr);
+
+ if (unlikely(msg_user(hdr) != LINK_PROTOCOL))
+ return;
+ if (mtyp == STATE_MSG) {
+ tipc_bclink_update_link_state(n, last);
+ return;
+ }
+ /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization,
+ * and transfer synch info in LINK_PROTOCOL messages.
+ */
+ if (tipc_node_is_up(n))
+ return;
+ if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG))
+ return;
+ n->bclink.last_sent = last;
+ n->bclink.last_in = last;
+ n->bclink.oos_state = 0;
+}
+
/**
* bclink_peek_nack - monitor retransmission requests sent by other nodes
*
@@ -358,10 +381,9 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
/* Prepare clone of message for local node */
skb = tipc_msg_reassemble(list);
- if (unlikely(!skb)) {
- __skb_queue_purge(list);
+ if (unlikely(!skb))
return -EHOSTUNREACH;
- }
+
/* Broadcast to all nodes */
if (likely(bclink)) {
tipc_bclink_lock(net);
@@ -413,7 +435,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
* all nodes in the cluster don't ACK at the same time
*/
if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_proto_xmit(node->active_links[node->addr & 1],
+ tipc_link_proto_xmit(node_active_link(node, node->addr),
STATE_MSG, 0, 0, 0, 0);
tn->bcl->stats.sent_acks++;
}
@@ -925,7 +947,6 @@ int tipc_bclink_init(struct net *net)
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
bcl->bearer_id = MAX_BEARERS;
rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
- bcl->state = WORKING_WORKING;
bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
msg_set_prevnode(bcl->pmsg, tn->own_addr);
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 3c290a48f720..d74c69bcf60b 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -133,5 +133,6 @@ void tipc_bclink_wakeup_users(struct net *net);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
void tipc_bclink_input(struct net *net);
+void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 00bc0e620532..eae58a6b121c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -470,6 +470,32 @@ void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
rcu_read_unlock();
}
+/* tipc_bearer_xmit() -send buffer to destination over bearer
+ */
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_bearer *b;
+ struct sk_buff *skb, *tmp;
+
+ if (skb_queue_empty(xmitq))
+ return;
+
+ rcu_read_lock();
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (likely(b)) {
+ skb_queue_walk_safe(xmitq, skb, tmp) {
+ __skb_dequeue(xmitq);
+ b->media->send_msg(net, skb, b, dst);
+ /* Until we remove cloning in tipc_l2_send_msg(): */
+ kfree_skb(skb);
+ }
+ }
+ rcu_read_unlock();
+}
+
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
* @buf: the received packet
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index dc714d977768..6426f242f626 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -217,5 +217,8 @@ void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net);
void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest);
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst);
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 0fcf133d5cb7..f4ed67778c54 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -129,6 +129,11 @@ static inline int less(u16 left, u16 right)
return less_eq(left, right) && (mod(right) != mod(left));
}
+static inline int in_range(u16 val, u16 min, u16 max)
+{
+ return !less(val, min) && !more(val, max);
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 967e292f53c8..164d08907d6f 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -35,7 +35,7 @@
*/
#include "core.h"
-#include "link.h"
+#include "node.h"
#include "discover.h"
/* min delay during bearer start up */
@@ -125,7 +125,6 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
- struct tipc_link *link;
struct tipc_media_addr maddr;
struct sk_buff *rbuf;
struct tipc_msg *msg = buf_msg(buf);
@@ -170,13 +169,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
return;
tipc_node_lock(node);
node->capabilities = caps;
- link = node->links[bearer->identity];
/* Prepare to validate requesting node's signature and media address */
sign_match = (signature == node->signature);
- addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr));
- link_up = link && tipc_link_is_up(link);
-
+ tipc_node_check_dest(node, bearer, &link_up, &addr_match, &maddr);
/* These three flags give us eight permutations: */
@@ -239,16 +235,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
if (accept_sign)
node->signature = signature;
- if (accept_addr) {
- if (!link)
- link = tipc_link_create(node, bearer, &maddr);
- if (link) {
- memcpy(&link->media_addr, &maddr, sizeof(maddr));
- tipc_link_reset(link);
- } else {
- respond = false;
- }
- }
+ if (accept_addr && !tipc_node_update_dest(node, bearer, &maddr))
+ respond = false;
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index eaa9fe54b4ae..55b675d20de8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -77,36 +77,70 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
};
/*
- * Out-of-range value for link session numbers
+ * Interval between NACKs when packets arrive out of order
*/
-#define INVALID_SESSION 0x10000
-
+#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
/*
- * Link state events:
+ * Out-of-range value for link session numbers
*/
-#define STARTING_EVT 856384768 /* link processing trigger */
-#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
-#define SILENCE_EVT 560817u /* timer dicovered silence from peer */
+#define WILDCARD_SESSION 0x10000
-/*
- * State value stored in 'failover_pkts'
+/* State value stored in 'failover_pkts'
*/
#define FIRST_FAILOVER 0xffffu
-static void link_handle_out_of_seq_msg(struct tipc_link *link,
- struct sk_buff *skb);
-static void tipc_link_proto_rcv(struct tipc_link *link,
- struct sk_buff *skb);
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol);
-static void link_state_event(struct tipc_link *l_ptr, u32 event);
+/* Link FSM states and events:
+ */
+enum {
+ TIPC_LINK_WORKING,
+ TIPC_LINK_PROBING,
+ TIPC_LINK_RESETTING,
+ TIPC_LINK_ESTABLISHING
+};
+
+enum {
+ PEER_RESET_EVT = RESET_MSG,
+ ACTIVATE_EVT = ACTIVATE_MSG,
+ TRAFFIC_EVT, /* Any other valid msg from peer */
+ SILENCE_EVT /* Peer was silent during last timer interval*/
+};
+
+/* Link FSM state checking routines
+ */
+static int link_working(struct tipc_link *l)
+{
+ return l->state == TIPC_LINK_WORKING;
+}
+
+static int link_probing(struct tipc_link *l)
+{
+ return l->state == TIPC_LINK_PROBING;
+}
+
+static int link_resetting(struct tipc_link *l)
+{
+ return l->state == TIPC_LINK_RESETTING;
+}
+
+static int link_establishing(struct tipc_link *l)
+{
+ return l->state == TIPC_LINK_ESTABLISHING;
+}
+
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ u16 rcvgap, int tolerance, int priority,
+ struct sk_buff_head *xmitq);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static void tipc_link_sync_xmit(struct tipc_link *l);
+static void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb);
static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb);
static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb);
-static void link_set_timer(struct tipc_link *link, unsigned long time);
+
/*
* Simple link routines
*/
@@ -115,26 +149,13 @@ static unsigned int align(unsigned int i)
return (i + 3) & ~3u;
}
-static void tipc_link_release(struct kref *kref)
-{
- kfree(container_of(kref, struct tipc_link, ref));
-}
-
-static void tipc_link_get(struct tipc_link *l_ptr)
-{
- kref_get(&l_ptr->ref);
-}
-
-static void tipc_link_put(struct tipc_link *l_ptr)
-{
- kref_put(&l_ptr->ref, tipc_link_release);
-}
-
static struct tipc_link *tipc_parallel_link(struct tipc_link *l)
{
- if (l->owner->active_links[0] != l)
- return l->owner->active_links[0];
- return l->owner->active_links[1];
+ struct tipc_node *n = l->owner;
+
+ if (node_active_link(n, 0) != l)
+ return node_active_link(n, 0);
+ return node_active_link(n, 1);
}
/*
@@ -144,74 +165,14 @@ int tipc_link_is_up(struct tipc_link *l_ptr)
{
if (!l_ptr)
return 0;
- return link_working_working(l_ptr) || link_working_unknown(l_ptr);
+ return link_working(l_ptr) || link_probing(l_ptr);
}
-int tipc_link_is_active(struct tipc_link *l_ptr)
+int tipc_link_is_active(struct tipc_link *l)
{
- return (l_ptr->owner->active_links[0] == l_ptr) ||
- (l_ptr->owner->active_links[1] == l_ptr);
-}
-
-/**
- * link_timeout - handle expiration of link timer
- * @l_ptr: pointer to link
- */
-static void link_timeout(unsigned long data)
-{
- struct tipc_link *l_ptr = (struct tipc_link *)data;
- struct sk_buff *skb;
-
- tipc_node_lock(l_ptr->owner);
-
- /* update counters used in statistical profiling of send traffic */
- l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq);
- l_ptr->stats.queue_sz_counts++;
-
- skb = skb_peek(&l_ptr->transmq);
- if (skb) {
- struct tipc_msg *msg = buf_msg(skb);
- u32 length = msg_size(msg);
-
- if ((msg_user(msg) == MSG_FRAGMENTER) &&
- (msg_type(msg) == FIRST_FRAGMENT)) {
- length = msg_size(msg_get_wrapped(msg));
- }
- if (length) {
- l_ptr->stats.msg_lengths_total += length;
- l_ptr->stats.msg_length_counts++;
- if (length <= 64)
- l_ptr->stats.msg_length_profile[0]++;
- else if (length <= 256)
- l_ptr->stats.msg_length_profile[1]++;
- else if (length <= 1024)
- l_ptr->stats.msg_length_profile[2]++;
- else if (length <= 4096)
- l_ptr->stats.msg_length_profile[3]++;
- else if (length <= 16384)
- l_ptr->stats.msg_length_profile[4]++;
- else if (length <= 32768)
- l_ptr->stats.msg_length_profile[5]++;
- else
- l_ptr->stats.msg_length_profile[6]++;
- }
- }
-
- /* do all other link processing performed on a periodic basis */
- if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner))
- link_state_event(l_ptr, SILENCE_EVT);
- l_ptr->silent_intv_cnt++;
- if (skb_queue_len(&l_ptr->backlogq))
- tipc_link_push_packets(l_ptr);
- link_set_timer(l_ptr, l_ptr->keepalive_intv);
- tipc_node_unlock(l_ptr->owner);
- tipc_link_put(l_ptr);
-}
+ struct tipc_node *n = l->owner;
-static void link_set_timer(struct tipc_link *link, unsigned long time)
-{
- if (!mod_timer(&link->timer, jiffies + time))
- tipc_link_get(link);
+ return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l);
}
/**
@@ -224,7 +185,9 @@ static void link_set_timer(struct tipc_link *link, unsigned long time)
*/
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr)
+ const struct tipc_media_addr *media_addr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq)
{
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
struct tipc_link *l_ptr;
@@ -240,7 +203,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
return NULL;
}
- if (n_ptr->links[b_ptr->identity]) {
+ if (n_ptr->links[b_ptr->identity].link) {
tipc_addr_string_fill(addr_string, n_ptr->addr);
pr_err("Attempt to establish second link on <%s> to %s\n",
b_ptr->name, addr_string);
@@ -252,7 +215,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
pr_warn("Link creation failed, no memory\n");
return NULL;
}
- kref_init(&l_ptr->ref);
l_ptr->addr = peer;
if_name = strchr(b_ptr->name, ':') + 1;
sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
@@ -263,10 +225,10 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
/* note: peer i/f name is updated by reset/activate message */
memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
l_ptr->owner = n_ptr;
- l_ptr->peer_session = INVALID_SESSION;
+ l_ptr->peer_session = WILDCARD_SESSION;
l_ptr->bearer_id = b_ptr->identity;
- link_set_supervision_props(l_ptr, b_ptr->tolerance);
- l_ptr->state = RESET_UNKNOWN;
+ l_ptr->tolerance = b_ptr->tolerance;
+ l_ptr->state = TIPC_LINK_RESETTING;
l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
msg = l_ptr->pmsg;
@@ -286,13 +248,11 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
__skb_queue_head_init(&l_ptr->backlogq);
__skb_queue_head_init(&l_ptr->deferdq);
skb_queue_head_init(&l_ptr->wakeupq);
- skb_queue_head_init(&l_ptr->inputq);
- skb_queue_head_init(&l_ptr->namedq);
+ l_ptr->inputq = inputq;
+ l_ptr->namedq = namedq;
+ skb_queue_head_init(l_ptr->inputq);
link_reset_statistics(l_ptr);
tipc_node_attach_link(n_ptr, l_ptr);
- setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr);
- link_state_event(l_ptr, STARTING_EVT);
-
return l_ptr;
}
@@ -303,13 +263,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
void tipc_link_delete(struct tipc_link *l)
{
tipc_link_reset(l);
- if (del_timer(&l->timer))
- tipc_link_put(l);
- l->flags |= LINK_STOPPED;
- /* Delete link now, or when timer is finished: */
tipc_link_reset_fragments(l);
tipc_node_detach_link(l->owner, l);
- tipc_link_put(l);
}
void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
@@ -321,7 +276,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
rcu_read_lock();
list_for_each_entry_rcu(node, &tn->node_list, list) {
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (link)
tipc_link_delete(link);
tipc_node_unlock(node);
@@ -329,12 +284,219 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
rcu_read_unlock();
}
+/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
+ */
+static void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ struct sk_buff_head list;
+
+ skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
+ 0, l->addr, link_own_addr(l), 0, 0, 0);
+ if (!skb)
+ return;
+ __skb_queue_head_init(&list);
+ __skb_queue_tail(&list, skb);
+ tipc_link_xmit(l, &list, xmitq);
+}
+
+/**
+ * tipc_link_fsm_evt - link finite state machine
+ * @l: pointer to link
+ * @evt: state machine event to be processed
+ * @xmitq: queue to prepend created protocol message, if any
+ */
+static int tipc_link_fsm_evt(struct tipc_link *l, int evt,
+ struct sk_buff_head *xmitq)
+{
+ int mtyp = 0, rc = 0;
+ struct tipc_link *pl;
+ enum {
+ LINK_RESET = 1,
+ LINK_ACTIVATE = (1 << 1),
+ SND_PROBE = (1 << 2),
+ SND_STATE = (1 << 3),
+ SND_RESET = (1 << 4),
+ SND_ACTIVATE = (1 << 5),
+ SND_BCAST_SYNC = (1 << 6)
+ } actions = 0;
+
+ if (l->exec_mode == TIPC_LINK_BLOCKED)
+ return rc;
+
+ switch (l->state) {
+ case TIPC_LINK_WORKING:
+ switch (evt) {
+ case TRAFFIC_EVT:
+ case ACTIVATE_EVT:
+ break;
+ case SILENCE_EVT:
+ l->state = TIPC_LINK_PROBING;
+ actions |= SND_PROBE;
+ break;
+ case PEER_RESET_EVT:
+ actions |= LINK_RESET | SND_ACTIVATE;
+ break;
+ default:
+ pr_debug("%s%u WORKING\n", link_unk_evt, evt);
+ }
+ break;
+ case TIPC_LINK_PROBING:
+ switch (evt) {
+ case TRAFFIC_EVT:
+ case ACTIVATE_EVT:
+ l->state = TIPC_LINK_WORKING;
+ break;
+ case PEER_RESET_EVT:
+ actions |= LINK_RESET | SND_ACTIVATE;
+ break;
+ case SILENCE_EVT:
+ if (l->silent_intv_cnt <= l->abort_limit) {
+ actions |= SND_PROBE;
+ break;
+ }
+ actions |= LINK_RESET | SND_RESET;
+ break;
+ default:
+ pr_err("%s%u PROBING\n", link_unk_evt, evt);
+ }
+ break;
+ case TIPC_LINK_RESETTING:
+ switch (evt) {
+ case TRAFFIC_EVT:
+ break;
+ case ACTIVATE_EVT:
+ pl = node_active_link(l->owner, 0);
+ if (pl && link_probing(pl))
+ break;
+ actions |= LINK_ACTIVATE;
+ if (!l->owner->working_links)
+ actions |= SND_BCAST_SYNC;
+ break;
+ case PEER_RESET_EVT:
+ l->state = TIPC_LINK_ESTABLISHING;
+ actions |= SND_ACTIVATE;
+ break;
+ case SILENCE_EVT:
+ actions |= SND_RESET;
+ break;
+ default:
+ pr_err("%s%u in RESETTING\n", link_unk_evt, evt);
+ }
+ break;
+ case TIPC_LINK_ESTABLISHING:
+ switch (evt) {
+ case TRAFFIC_EVT:
+ case ACTIVATE_EVT:
+ pl = node_active_link(l->owner, 0);
+ if (pl && link_probing(pl))
+ break;
+ actions |= LINK_ACTIVATE;
+ if (!l->owner->working_links)
+ actions |= SND_BCAST_SYNC;
+ break;
+ case PEER_RESET_EVT:
+ break;
+ case SILENCE_EVT:
+ actions |= SND_ACTIVATE;
+ break;
+ default:
+ pr_err("%s%u ESTABLISHING\n", link_unk_evt, evt);
+ }
+ break;
+ default:
+ pr_err("Unknown link state %u/%u\n", l->state, evt);
+ }
+
+ /* Perform actions as decided by FSM */
+ if (actions & LINK_RESET) {
+ l->exec_mode = TIPC_LINK_BLOCKED;
+ rc |= TIPC_LINK_DOWN_EVT;
+ }
+ if (actions & LINK_ACTIVATE) {
+ l->exec_mode = TIPC_LINK_OPEN;
+ rc |= TIPC_LINK_UP_EVT;
+ }
+ if (actions & (SND_STATE | SND_PROBE))
+ mtyp = STATE_MSG;
+ if (actions & SND_RESET)
+ mtyp = RESET_MSG;
+ if (actions & SND_ACTIVATE)
+ mtyp = ACTIVATE_MSG;
+ if (actions & (SND_PROBE | SND_STATE | SND_RESET | SND_ACTIVATE))
+ tipc_link_build_proto_msg(l, mtyp, actions & SND_PROBE,
+ 0, 0, 0, xmitq);
+ if (actions & SND_BCAST_SYNC)
+ tipc_link_build_bcast_sync_msg(l, xmitq);
+ return rc;
+}
+
+/* link_profile_stats - update statistical profiling of traffic
+ */
+static void link_profile_stats(struct tipc_link *l)
+{
+ struct sk_buff *skb;
+ struct tipc_msg *msg;
+ int length;
+
+ /* Update counters used in statistical profiling of send traffic */
+ l->stats.accu_queue_sz += skb_queue_len(&l->transmq);
+ l->stats.queue_sz_counts++;
+
+ skb = skb_peek(&l->transmq);
+ if (!skb)
+ return;
+ msg = buf_msg(skb);
+ length = msg_size(msg);
+
+ if (msg_user(msg) == MSG_FRAGMENTER) {
+ if (msg_type(msg) != FIRST_FRAGMENT)
+ return;
+ length = msg_size(msg_get_wrapped(msg));
+ }
+ l->stats.msg_lengths_total += length;
+ l->stats.msg_length_counts++;
+ if (length <= 64)
+ l->stats.msg_length_profile[0]++;
+ else if (length <= 256)
+ l->stats.msg_length_profile[1]++;
+ else if (length <= 1024)
+ l->stats.msg_length_profile[2]++;
+ else if (length <= 4096)
+ l->stats.msg_length_profile[3]++;
+ else if (length <= 16384)
+ l->stats.msg_length_profile[4]++;
+ else if (length <= 32768)
+ l->stats.msg_length_profile[5]++;
+ else
+ l->stats.msg_length_profile[6]++;
+}
+
+/* tipc_link_timeout - perform periodic task as instructed from node timeout
+ */
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+ int rc = 0;
+
+ link_profile_stats(l);
+ if (l->silent_intv_cnt)
+ rc = tipc_link_fsm_evt(l, SILENCE_EVT, xmitq);
+ else if (link_working(l) && tipc_bclink_acks_missing(l->owner))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ l->silent_intv_cnt++;
+ return rc;
+}
+
/**
* link_schedule_user - schedule a message sender for wakeup after congestion
* @link: congested link
* @list: message that was attempted sent
* Create pseudo msg to send back to user when congestion abates
- * Only consumes message if there is an error
+ * Does not consume buffer list
*/
static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
{
@@ -347,8 +509,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
/* This really cannot happen... */
if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
- tipc_link_reset(link);
- goto err;
+ return -ENOBUFS;
}
/* Non-blocking sender: */
if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending)
@@ -358,15 +519,12 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
addr, addr, oport, 0, 0);
if (!skb)
- goto err;
+ return -ENOBUFS;
TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list);
TIPC_SKB_CB(skb)->chain_imp = imp;
skb_queue_tail(&link->wakeupq, skb);
link->stats.link_congs++;
return -ELINKCONG;
-err:
- __skb_queue_purge(list);
- return -ENOBUFS;
}
/**
@@ -388,8 +546,8 @@ void link_prepare_wakeup(struct tipc_link *l)
if ((pnd[imp] + l->backlog[imp].len) >= lim)
break;
skb_unlink(skb, &l->wakeupq);
- skb_queue_tail(&l->inputq, skb);
- l->owner->inputq = &l->inputq;
+ skb_queue_tail(l->inputq, skb);
+ l->owner->inputq = l->inputq;
l->owner->action_flags |= TIPC_MSG_EVT;
}
}
@@ -436,21 +594,22 @@ void tipc_link_reset(struct tipc_link *l_ptr)
msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff));
/* Link is down, accept any session */
- l_ptr->peer_session = INVALID_SESSION;
+ l_ptr->peer_session = WILDCARD_SESSION;
/* Prepare for renewed mtu size negotiation */
l_ptr->mtu = l_ptr->advertised_mtu;
- l_ptr->state = RESET_UNKNOWN;
+ l_ptr->state = TIPC_LINK_RESETTING;
- if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET))
+ if ((prev_state == TIPC_LINK_RESETTING) ||
+ (prev_state == TIPC_LINK_ESTABLISHING))
return;
- tipc_node_link_down(l_ptr->owner, l_ptr);
+ tipc_node_link_down(l_ptr->owner, l_ptr->bearer_id);
tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr);
if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) {
- l_ptr->flags |= LINK_FAILINGOVER;
+ l_ptr->exec_mode = TIPC_LINK_BLOCKED;
l_ptr->failover_checkpt = l_ptr->rcv_nxt;
pl->failover_pkts = FIRST_FAILOVER;
pl->failover_checkpt = l_ptr->rcv_nxt;
@@ -462,7 +621,7 @@ void tipc_link_reset(struct tipc_link *l_ptr)
__skb_queue_purge(&l_ptr->transmq);
__skb_queue_purge(&l_ptr->deferdq);
if (!owner->inputq)
- owner->inputq = &l_ptr->inputq;
+ owner->inputq = l_ptr->inputq;
skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq);
if (!skb_queue_empty(owner->inputq))
owner->action_flags |= TIPC_MSG_EVT;
@@ -470,173 +629,32 @@ void tipc_link_reset(struct tipc_link *l_ptr)
l_ptr->reasm_buf = NULL;
l_ptr->rcv_unacked = 0;
l_ptr->snd_nxt = 1;
+ l_ptr->rcv_nxt = 1;
l_ptr->silent_intv_cnt = 0;
+ l_ptr->stats.recv_info = 0;
l_ptr->stale_count = 0;
link_reset_statistics(l_ptr);
}
-static void link_activate(struct tipc_link *link)
+void tipc_link_activate(struct tipc_link *link)
{
struct tipc_node *node = link->owner;
link->rcv_nxt = 1;
link->stats.recv_info = 1;
link->silent_intv_cnt = 0;
- tipc_node_link_up(node, link);
+ link->state = TIPC_LINK_WORKING;
+ link->exec_mode = TIPC_LINK_OPEN;
+ tipc_node_link_up(node, link->bearer_id);
tipc_bearer_add_dest(node->net, link->bearer_id, link->addr);
}
/**
- * link_state_event - link finite state machine
- * @l_ptr: pointer to link
- * @event: state machine event to process
- */
-static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
-{
- struct tipc_link *other;
- unsigned long timer_intv = l_ptr->keepalive_intv;
-
- if (l_ptr->flags & LINK_STOPPED)
- return;
-
- if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
- return; /* Not yet. */
-
- if (l_ptr->flags & LINK_FAILINGOVER)
- return;
-
- switch (l_ptr->state) {
- case WORKING_WORKING:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- l_ptr->silent_intv_cnt = 0;
- break;
- case SILENCE_EVT:
- if (!l_ptr->silent_intv_cnt) {
- if (tipc_bclink_acks_missing(l_ptr->owner))
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 0, 0, 0, 0);
- break;
- }
- l_ptr->state = WORKING_UNKNOWN;
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- break;
- case RESET_MSG:
- pr_debug("%s<%s>, requested by peer\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- default:
- pr_debug("%s%u in WW state\n", link_unk_evt, event);
- }
- break;
- case WORKING_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- l_ptr->state = WORKING_WORKING;
- l_ptr->silent_intv_cnt = 0;
- break;
- case RESET_MSG:
- pr_debug("%s<%s>, requested by peer while probing\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- case SILENCE_EVT:
- if (!l_ptr->silent_intv_cnt) {
- l_ptr->state = WORKING_WORKING;
- if (tipc_bclink_acks_missing(l_ptr->owner))
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 0, 0, 0, 0);
- } else if (l_ptr->silent_intv_cnt <
- l_ptr->abort_limit) {
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 1, 0, 0, 0);
- } else { /* Link has failed */
- pr_debug("%s<%s>, peer not responding\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_UNKNOWN;
- tipc_link_proto_xmit(l_ptr, RESET_MSG,
- 0, 0, 0, 0);
- }
- break;
- default:
- pr_err("%s%u in WU state\n", link_unk_evt, event);
- }
- break;
- case RESET_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- break;
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- link_activate(l_ptr);
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- if (l_ptr->owner->working_links == 1)
- tipc_link_sync_xmit(l_ptr);
- break;
- case RESET_MSG:
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 1, 0, 0, 0);
- break;
- case STARTING_EVT:
- l_ptr->flags |= LINK_STARTED;
- link_set_timer(l_ptr, timer_intv);
- break;
- case SILENCE_EVT:
- tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0);
- break;
- default:
- pr_err("%s%u in RU state\n", link_unk_evt, event);
- }
- break;
- case RESET_RESET:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- link_activate(l_ptr);
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- if (l_ptr->owner->working_links == 1)
- tipc_link_sync_xmit(l_ptr);
- break;
- case RESET_MSG:
- break;
- case SILENCE_EVT:
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- default:
- pr_err("%s%u in RR state\n", link_unk_evt, event);
- }
- break;
- default:
- pr_err("Unknown link state %u/%u\n", l_ptr->state, event);
- }
-}
-
-/**
* __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
* @link: link to use
* @list: chain of buffers containing message
*
- * Consumes the buffer chain, except when returning -ELINKCONG,
- * since the caller then may want to make more send attempts.
+ * Consumes the buffer chain, except when returning an error code,
* Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
*/
@@ -660,10 +678,9 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
return link_schedule_user(link, list);
}
- if (unlikely(msg_size(msg) > mtu)) {
- __skb_queue_purge(list);
+ if (unlikely(msg_size(msg) > mtu))
return -EMSGSIZE;
- }
+
/* Prepare each packet for sending, and add to relevant queue: */
while (skb_queue_len(list)) {
skb = skb_peek(list);
@@ -700,101 +717,90 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
return 0;
}
-static void skb2list(struct sk_buff *skb, struct sk_buff_head *list)
-{
- skb_queue_head_init(list);
- __skb_queue_tail(list, skb);
-}
-
-static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb)
-{
- struct sk_buff_head head;
-
- skb2list(skb, &head);
- return __tipc_link_xmit(link->owner->net, link, &head);
-}
-
-/* tipc_link_xmit_skb(): send single buffer to destination
- * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
- * messages, which will not be rejected
- * The only exception is datagram messages rerouted after secondary
- * lookup, which are rare and safe to dispose of anyway.
- * TODO: Return real return value, and let callers use
- * tipc_wait_for_sendpkt() where applicable
- */
-int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
- u32 selector)
-{
- struct sk_buff_head head;
- int rc;
-
- skb2list(skb, &head);
- rc = tipc_link_xmit(net, &head, dnode, selector);
- if (rc == -ELINKCONG)
- kfree_skb(skb);
- return 0;
-}
-
/**
- * tipc_link_xmit() is the general link level function for message sending
- * @net: the applicable net namespace
+ * tipc_link_xmit(): enqueue buffer list according to queue situation
+ * @link: link to use
* @list: chain of buffers containing message
- * @dsz: amount of user data to be sent
- * @dnode: address of destination node
- * @selector: a number used for deterministic link selection
- * Consumes the buffer chain, except when returning -ELINKCONG
- * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ * @xmitq: returned list of packets to be sent by caller
+ *
+ * Consumes the buffer chain, except when returning -ELINKCONG,
+ * since the caller then may want to make more send attempts.
+ * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
+ * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
*/
-int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
- u32 selector)
+int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link *link = NULL;
- struct tipc_node *node;
- int rc = -EHOSTUNREACH;
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ unsigned int maxwin = l->window;
+ unsigned int i, imp = msg_importance(hdr);
+ unsigned int mtu = l->mtu;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ u16 bc_last_in = l->owner->bclink.last_in;
+ struct sk_buff_head *transmq = &l->transmq;
+ struct sk_buff_head *backlogq = &l->backlogq;
+ struct sk_buff *skb, *_skb, *bskb;
- node = tipc_node_find(net, dnode);
- if (node) {
- tipc_node_lock(node);
- link = node->active_links[selector & 1];
- if (link)
- rc = __tipc_link_xmit(net, link, list);
- tipc_node_unlock(node);
- tipc_node_put(node);
+ /* Match msg importance against this and all higher backlog limits: */
+ for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+ if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+ return link_schedule_user(l, list);
}
- if (link)
- return rc;
+ if (unlikely(msg_size(hdr) > mtu))
+ return -EMSGSIZE;
- if (likely(in_own_node(net, dnode))) {
- tipc_sk_rcv(net, list);
- return 0;
- }
+ /* Prepare each packet for sending, and add to relevant queue: */
+ while (skb_queue_len(list)) {
+ skb = skb_peek(list);
+ hdr = buf_msg(skb);
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_last_in);
- __skb_queue_purge(list);
- return rc;
+ if (likely(skb_queue_len(transmq) < maxwin)) {
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ return -ENOBUFS;
+ __skb_dequeue(list);
+ __skb_queue_tail(transmq, skb);
+ __skb_queue_tail(xmitq, _skb);
+ l->rcv_unacked = 0;
+ seqno++;
+ continue;
+ }
+ if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) {
+ kfree_skb(__skb_dequeue(list));
+ l->stats.sent_bundled++;
+ continue;
+ }
+ if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) {
+ kfree_skb(__skb_dequeue(list));
+ __skb_queue_tail(backlogq, bskb);
+ l->backlog[msg_importance(buf_msg(bskb))].len++;
+ l->stats.sent_bundled++;
+ l->stats.sent_bundles++;
+ continue;
+ }
+ l->backlog[imp].len += skb_queue_len(list);
+ skb_queue_splice_tail_init(list, backlogq);
+ }
+ l->snd_nxt = seqno;
+ return 0;
}
-/*
- * tipc_link_sync_xmit - synchronize broadcast link endpoints.
- *
- * Give a newly added peer node the sequence number where it should
- * start receiving and acking broadcast packets.
- *
- * Called with node locked
- */
-static void tipc_link_sync_xmit(struct tipc_link *link)
+static void skb2list(struct sk_buff *skb, struct sk_buff_head *list)
{
- struct sk_buff *skb;
- struct tipc_msg *msg;
+ skb_queue_head_init(list);
+ __skb_queue_tail(list, skb);
+}
- skb = tipc_buf_acquire(INT_H_SIZE);
- if (!skb)
- return;
+static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb)
+{
+ struct sk_buff_head head;
- msg = buf_msg(skb);
- tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG,
- INT_H_SIZE, link->addr);
- msg_set_last_bcast(msg, link->owner->bclink.acked);
- __tipc_link_xmit_skb(link, skb);
+ skb2list(skb, &head);
+ return __tipc_link_xmit(link->owner->net, link, &head);
}
/*
@@ -847,6 +853,34 @@ void tipc_link_push_packets(struct tipc_link *link)
link->snd_nxt = seqno;
}
+void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb, *_skb;
+ struct tipc_msg *hdr;
+ u16 seqno = l->snd_nxt;
+ u16 ack = l->rcv_nxt - 1;
+
+ while (skb_queue_len(&l->transmq) < l->window) {
+ skb = skb_peek(&l->backlogq);
+ if (!skb)
+ break;
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ break;
+ __skb_dequeue(&l->backlogq);
+ hdr = buf_msg(skb);
+ l->backlog[msg_importance(hdr)].len--;
+ __skb_queue_tail(&l->transmq, skb);
+ __skb_queue_tail(xmitq, _skb);
+ msg_set_ack(hdr, ack);
+ msg_set_seqno(hdr, seqno);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ l->rcv_unacked = 0;
+ seqno++;
+ }
+ l->snd_nxt = seqno;
+}
+
void tipc_link_reset_all(struct tipc_node *node)
{
char addr_string[16];
@@ -858,9 +892,9 @@ void tipc_link_reset_all(struct tipc_node *node)
tipc_addr_string_fill(addr_string, node->addr));
for (i = 0; i < MAX_BEARERS; i++) {
- if (node->links[i]) {
- link_print(node->links[i], "Resetting link\n");
- tipc_link_reset(node->links[i]);
+ if (node->links[i].link) {
+ link_print(node->links[i].link, "Resetting link\n");
+ tipc_link_reset(node->links[i].link);
}
}
@@ -877,9 +911,13 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
if (l_ptr->addr) {
/* Handle failure on standard link */
- link_print(l_ptr, "Resetting link\n");
+ link_print(l_ptr, "Resetting link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(msg), msg_type(msg), msg_size(msg),
+ msg_errcode(msg));
+ pr_info("sqno %u, prev: %x, src: %x\n",
+ msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
tipc_link_reset(l_ptr);
-
} else {
/* Handle failure on broadcast link */
struct tipc_node *n_ptr;
@@ -940,6 +978,41 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
}
}
+static int tipc_link_retransm(struct tipc_link *l, int retransm,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
+ struct tipc_msg *hdr;
+
+ if (!skb)
+ return 0;
+
+ /* Detect repeated retransmit failures on same packet */
+ if (likely(l->last_retransm != buf_seqno(skb))) {
+ l->last_retransm = buf_seqno(skb);
+ l->stale_count = 1;
+ } else if (++l->stale_count > 100) {
+ link_retransmit_failure(l, skb);
+ return TIPC_LINK_DOWN_EVT;
+ }
+ skb_queue_walk(&l->transmq, skb) {
+ if (!retransm)
+ return 0;
+ hdr = buf_msg(skb);
+ _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+ if (!_skb)
+ return 0;
+ hdr = buf_msg(_skb);
+ msg_set_ack(hdr, l->rcv_nxt - 1);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ _skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_tail(xmitq, _skb);
+ retransm--;
+ l->stats.retransmitted++;
+ }
+ return 0;
+}
+
/* link_synch(): check if all packets arrived before the synch
* point have been consumed
* Returns true if the parallel links are synched, otherwise false
@@ -959,168 +1032,13 @@ static bool link_synch(struct tipc_link *l)
/* Is it still in the input queue ? */
post_synch = mod(pl->rcv_nxt - l->synch_point) - 1;
- if (skb_queue_len(&pl->inputq) > post_synch)
+ if (skb_queue_len(pl->inputq) > post_synch)
return false;
synched:
- l->flags &= ~LINK_SYNCHING;
+ l->exec_mode = TIPC_LINK_OPEN;
return true;
}
-static void link_retrieve_defq(struct tipc_link *link,
- struct sk_buff_head *list)
-{
- u16 seq_no;
-
- if (skb_queue_empty(&link->deferdq))
- return;
-
- seq_no = buf_seqno(skb_peek(&link->deferdq));
- if (seq_no == link->rcv_nxt)
- skb_queue_splice_tail_init(&link->deferdq, list);
-}
-
-/**
- * tipc_rcv - process TIPC packets/messages arriving from off-node
- * @net: the applicable net namespace
- * @skb: TIPC packet
- * @b_ptr: pointer to bearer message arrived on
- *
- * Invoked with no locks held. Bearer pointer must point to a valid bearer
- * structure (i.e. cannot be NULL), but bearer can be inactive.
- */
-void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff_head head;
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct sk_buff *skb1, *tmp;
- struct tipc_msg *msg;
- u16 seq_no;
- u16 ackd;
- u32 released;
-
- skb2list(skb, &head);
-
- while ((skb = __skb_dequeue(&head))) {
- /* Ensure message is well-formed */
- if (unlikely(!tipc_msg_validate(skb)))
- goto discard;
-
- /* Handle arrival of a non-unicast link message */
- msg = buf_msg(skb);
- if (unlikely(msg_non_seq(msg))) {
- if (msg_user(msg) == LINK_CONFIG)
- tipc_disc_rcv(net, skb, b_ptr);
- else
- tipc_bclink_rcv(net, skb);
- continue;
- }
-
- /* Discard unicast link messages destined for another node */
- if (unlikely(!msg_short(msg) &&
- (msg_destnode(msg) != tn->own_addr)))
- goto discard;
-
- /* Locate neighboring node that sent message */
- n_ptr = tipc_node_find(net, msg_prevnode(msg));
- if (unlikely(!n_ptr))
- goto discard;
-
- tipc_node_lock(n_ptr);
- /* Locate unicast link endpoint that should handle message */
- l_ptr = n_ptr->links[b_ptr->identity];
- if (unlikely(!l_ptr))
- goto unlock;
-
- /* Verify that communication with node is currently allowed */
- if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
- msg_user(msg) == LINK_PROTOCOL &&
- (msg_type(msg) == RESET_MSG ||
- msg_type(msg) == ACTIVATE_MSG) &&
- !msg_redundant_link(msg))
- n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
-
- if (tipc_node_blocked(n_ptr))
- goto unlock;
-
- /* Validate message sequence number info */
- seq_no = msg_seqno(msg);
- ackd = msg_ack(msg);
-
- /* Release acked messages */
- if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg)))
- tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
-
- released = 0;
- skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) {
- if (more(buf_seqno(skb1), ackd))
- break;
- __skb_unlink(skb1, &l_ptr->transmq);
- kfree_skb(skb1);
- released = 1;
- }
-
- /* Try sending any messages link endpoint has pending */
- if (unlikely(skb_queue_len(&l_ptr->backlogq)))
- tipc_link_push_packets(l_ptr);
-
- if (released && !skb_queue_empty(&l_ptr->wakeupq))
- link_prepare_wakeup(l_ptr);
-
- /* Process the incoming packet */
- if (unlikely(!link_working_working(l_ptr))) {
- if (msg_user(msg) == LINK_PROTOCOL) {
- tipc_link_proto_rcv(l_ptr, skb);
- link_retrieve_defq(l_ptr, &head);
- skb = NULL;
- goto unlock;
- }
-
- /* Traffic message. Conditionally activate link */
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
-
- if (link_working_working(l_ptr)) {
- /* Re-insert buffer in front of queue */
- __skb_queue_head(&head, skb);
- skb = NULL;
- goto unlock;
- }
- goto unlock;
- }
-
- /* Link is now in state WORKING_WORKING */
- if (unlikely(seq_no != l_ptr->rcv_nxt)) {
- link_handle_out_of_seq_msg(l_ptr, skb);
- link_retrieve_defq(l_ptr, &head);
- skb = NULL;
- goto unlock;
- }
- l_ptr->silent_intv_cnt = 0;
-
- /* Synchronize with parallel link if applicable */
- if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) {
- if (!link_synch(l_ptr))
- goto unlock;
- }
- l_ptr->rcv_nxt++;
- if (unlikely(!skb_queue_empty(&l_ptr->deferdq)))
- link_retrieve_defq(l_ptr, &head);
- if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
- l_ptr->stats.sent_acks++;
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0);
- }
- tipc_link_input(l_ptr, skb);
- skb = NULL;
-unlock:
- tipc_node_unlock(n_ptr);
- tipc_node_put(n_ptr);
-discard:
- if (unlikely(skb))
- kfree_skb(skb);
- }
-}
-
/* tipc_data_input - deliver data and name distr msgs to upper layer
*
* Consumes buffer if message is of right type
@@ -1138,16 +1056,16 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb)
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
case CONN_MANAGER:
- if (tipc_skb_queue_tail(&link->inputq, skb, dport)) {
- node->inputq = &link->inputq;
+ if (tipc_skb_queue_tail(link->inputq, skb, dport)) {
+ node->inputq = link->inputq;
node->action_flags |= TIPC_MSG_EVT;
}
return true;
case NAME_DISTRIBUTOR:
node->bclink.recv_permitted = true;
- node->namedq = &link->namedq;
- skb_queue_tail(&link->namedq, skb);
- if (skb_queue_len(&link->namedq) == 1)
+ node->namedq = link->namedq;
+ skb_queue_tail(link->namedq, skb);
+ if (skb_queue_len(link->namedq) == 1)
node->action_flags |= TIPC_NAMED_MSG_EVT;
return true;
case MSG_BUNDLER:
@@ -1174,13 +1092,10 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
struct sk_buff *iskb;
int pos = 0;
- if (likely(tipc_data_input(link, skb)))
- return;
-
switch (msg_user(msg)) {
case TUNNEL_PROTOCOL:
if (msg_dup(msg)) {
- link->flags |= LINK_SYNCHING;
+ link->exec_mode = TIPC_LINK_TUNNEL;
link->synch_point = msg_seqno(msg_get_wrapped(msg));
kfree_skb(skb);
break;
@@ -1215,6 +1130,110 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
};
}
+static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
+{
+ bool released = false;
+ struct sk_buff *skb, *tmp;
+
+ skb_queue_walk_safe(&l->transmq, skb, tmp) {
+ if (more(buf_seqno(skb), acked))
+ break;
+ __skb_unlink(skb, &l->transmq);
+ kfree_skb(skb);
+ released = true;
+ }
+ return released;
+}
+
+/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
+ * @link: the link that should handle the message
+ * @skb: TIPC packet
+ * @xmitq: queue to place packets to be sent after this call
+ */
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *arrvq = &l->deferdq;
+ struct sk_buff *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno, rcv_nxt;
+ int rc = 0;
+
+ if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
+ if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0,
+ 0, 0, 0, xmitq);
+ return rc;
+ }
+
+ skb_queue_walk_safe(arrvq, skb, tmp) {
+ hdr = buf_msg(skb);
+
+ /* Verify and update link state */
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) {
+ __skb_dequeue(arrvq);
+ rc |= tipc_link_proto_rcv(l, skb, xmitq);
+ continue;
+ }
+
+ if (unlikely(!link_working(l))) {
+ rc |= tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq);
+ if (!link_working(l)) {
+ kfree_skb(__skb_dequeue(arrvq));
+ return rc;
+ }
+ }
+
+ l->silent_intv_cnt = 0;
+
+ /* Forward queues and wake up waiting users */
+ if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
+
+ /* Defer reception if there is a gap in the sequence */
+ seqno = msg_seqno(hdr);
+ rcv_nxt = l->rcv_nxt;
+ if (unlikely(less(rcv_nxt, seqno))) {
+ l->stats.deferred_recv++;
+ return rc;
+ }
+
+ __skb_dequeue(arrvq);
+
+ /* Drop if packet already received */
+ if (unlikely(more(rcv_nxt, seqno))) {
+ l->stats.duplicates++;
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* Synchronize with parallel link if applicable */
+ if (unlikely(l->exec_mode == TIPC_LINK_TUNNEL))
+ if (!msg_dup(hdr) && !link_synch(l)) {
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* Packet can be delivered */
+ l->rcv_nxt++;
+ l->stats.recv_info++;
+ if (unlikely(!tipc_data_input(l, skb)))
+ tipc_link_input(l, skb);
+
+ /* Ack at regular intervals */
+ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
+ l->rcv_unacked = 0;
+ l->stats.sent_acks++;
+ tipc_link_build_proto_msg(l, STATE_MSG,
+ 0, 0, 0, 0, xmitq);
+ }
+ }
+ return rc;
+}
+
/**
* tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
*
@@ -1255,235 +1274,85 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
}
/*
- * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
+ * Send protocol message to the other endpoint.
*/
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
- struct sk_buff *buf)
+void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
+ u32 gap, u32 tolerance, u32 priority)
{
- u32 seq_no = buf_seqno(buf);
-
- if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
- tipc_link_proto_rcv(l_ptr, buf);
- return;
- }
-
- /* Record OOS packet arrival */
- l_ptr->silent_intv_cnt = 0;
+ struct sk_buff *skb = NULL;
+ struct sk_buff_head xmitq;
- /*
- * Discard packet if a duplicate; otherwise add it to deferred queue
- * and notify peer of gap as per protocol specification
- */
- if (less(seq_no, l_ptr->rcv_nxt)) {
- l_ptr->stats.duplicates++;
- kfree_skb(buf);
+ __skb_queue_head_init(&xmitq);
+ tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
+ tolerance, priority, &xmitq);
+ skb = __skb_dequeue(&xmitq);
+ if (!skb)
return;
- }
-
- if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) {
- l_ptr->stats.deferred_recv++;
- if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1)
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0);
- } else {
- l_ptr->stats.duplicates++;
- }
+ tipc_bearer_send(l->owner->net, l->bearer_id, skb, &l->media_addr);
+ l->rcv_unacked = 0;
+ kfree_skb(skb);
}
-/*
- * Send protocol message to the other endpoint.
+/* tipc_link_build_proto_msg: prepare link protocol message for transmission
*/
-void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
- u32 gap, u32 tolerance, u32 priority)
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ u16 rcvgap, int tolerance, int priority,
+ struct sk_buff_head *xmitq)
{
- struct sk_buff *buf = NULL;
- struct tipc_msg *msg = l_ptr->pmsg;
- u32 msg_size = sizeof(l_ptr->proto_msg);
- int r_flag;
- u16 last_rcv;
-
- /* Don't send protocol message during link failover */
- if (l_ptr->flags & LINK_FAILINGOVER)
- return;
-
- /* Abort non-RESET send if communication with node is prohibited */
- if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG))
+ struct sk_buff *skb = NULL;
+ struct tipc_msg *hdr = l->pmsg;
+ u16 snd_nxt = l->snd_nxt;
+ u16 rcv_nxt = l->rcv_nxt;
+ u16 rcv_last = rcv_nxt - 1;
+ int node_up = l->owner->bclink.recv_permitted;
+
+ /* Don't send protocol message during reset or link failover */
+ if (l->exec_mode == TIPC_LINK_BLOCKED)
return;
- /* Create protocol message with "out-of-sequence" sequence number */
- msg_set_type(msg, msg_typ);
- msg_set_net_plane(msg, l_ptr->net_plane);
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net));
+ msg_set_type(hdr, mtyp);
+ msg_set_net_plane(hdr, l->net_plane);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net));
+ msg_set_link_tolerance(hdr, tolerance);
+ msg_set_linkprio(hdr, priority);
+ msg_set_redundant_link(hdr, node_up);
+ msg_set_seq_gap(hdr, 0);
- if (msg_typ == STATE_MSG) {
- u16 next_sent = l_ptr->snd_nxt;
+ /* Compatibility: created msg must not be in sequence with pkt flow */
+ msg_set_seqno(hdr, snd_nxt + U16_MAX / 2);
- if (!tipc_link_is_up(l_ptr))
+ if (mtyp == STATE_MSG) {
+ if (!tipc_link_is_up(l))
return;
- msg_set_next_sent(msg, next_sent);
- if (!skb_queue_empty(&l_ptr->deferdq)) {
- last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq));
- gap = mod(last_rcv - l_ptr->rcv_nxt);
+ msg_set_next_sent(hdr, snd_nxt);
+
+ /* Override rcvgap if there are packets in deferred queue */
+ if (!skb_queue_empty(&l->deferdq))
+ rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt;
+ if (rcvgap) {
+ msg_set_seq_gap(hdr, rcvgap);
+ l->stats.sent_nacks++;
}
- msg_set_seq_gap(msg, gap);
- if (gap)
- l_ptr->stats.sent_nacks++;
- msg_set_link_tolerance(msg, tolerance);
- msg_set_linkprio(msg, priority);
- msg_set_max_pkt(msg, l_ptr->mtu);
- msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
- msg_set_probe(msg, probe_msg != 0);
- if (probe_msg)
- l_ptr->stats.sent_probes++;
- l_ptr->stats.sent_states++;
- } else { /* RESET_MSG or ACTIVATE_MSG */
- msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1));
- msg_set_seq_gap(msg, 0);
- msg_set_next_sent(msg, 1);
- msg_set_probe(msg, 0);
- msg_set_link_tolerance(msg, l_ptr->tolerance);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_max_pkt(msg, l_ptr->advertised_mtu);
+ msg_set_ack(hdr, rcv_last);
+ msg_set_probe(hdr, probe);
+ if (probe)
+ l->stats.sent_probes++;
+ l->stats.sent_states++;
+ } else {
+ /* RESET_MSG or ACTIVATE_MSG */
+ msg_set_max_pkt(hdr, l->advertised_mtu);
+ msg_set_ack(hdr, l->failover_checkpt - 1);
+ msg_set_next_sent(hdr, 1);
}
-
- r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr));
- msg_set_redundant_link(msg, r_flag);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_size(msg, msg_size);
-
- msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2)));
-
- buf = tipc_buf_acquire(msg_size);
- if (!buf)
+ skb = tipc_buf_acquire(msg_size(hdr));
+ if (!skb)
return;
-
- skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
- buf->priority = TC_PRIO_CONTROL;
- tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf,
- &l_ptr->media_addr);
- l_ptr->rcv_unacked = 0;
- kfree_skb(buf);
+ skb_copy_to_linear_data(skb, hdr, msg_size(hdr));
+ skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_head(xmitq, skb);
}
-/*
- * Receive protocol message :
- * Note that network plane id propagates through the network, and may
- * change at any time. The node with lowest address rules
- */
-static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
- struct sk_buff *buf)
-{
- u32 rec_gap = 0;
- u32 msg_tol;
- struct tipc_msg *msg = buf_msg(buf);
-
- if (l_ptr->flags & LINK_FAILINGOVER)
- goto exit;
-
- if (l_ptr->net_plane != msg_net_plane(msg))
- if (link_own_addr(l_ptr) > msg_prevnode(msg))
- l_ptr->net_plane = msg_net_plane(msg);
-
- switch (msg_type(msg)) {
-
- case RESET_MSG:
- if (!link_working_unknown(l_ptr) &&
- (l_ptr->peer_session != INVALID_SESSION)) {
- if (less_eq(msg_session(msg), l_ptr->peer_session))
- break; /* duplicate or old reset: ignore */
- }
-
- if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
- link_working_unknown(l_ptr))) {
- /*
- * peer has lost contact -- don't allow peer's links
- * to reactivate before we recognize loss & clean up
- */
- l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
- }
-
- link_state_event(l_ptr, RESET_MSG);
-
- /* fall thru' */
- case ACTIVATE_MSG:
- /* Update link settings according other endpoint's values */
- strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg));
-
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol > l_ptr->tolerance)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) > l_ptr->priority)
- l_ptr->priority = msg_linkprio(msg);
-
- if (l_ptr->mtu > msg_max_pkt(msg))
- l_ptr->mtu = msg_max_pkt(msg);
-
- /* Synchronize broadcast link info, if not done previously */
- if (!tipc_node_is_up(l_ptr->owner)) {
- l_ptr->owner->bclink.last_sent =
- l_ptr->owner->bclink.last_in =
- msg_last_bcast(msg);
- l_ptr->owner->bclink.oos_state = 0;
- }
-
- l_ptr->peer_session = msg_session(msg);
- l_ptr->peer_bearer_id = msg_bearer_id(msg);
-
- if (msg_type(msg) == ACTIVATE_MSG)
- link_state_event(l_ptr, ACTIVATE_MSG);
- break;
- case STATE_MSG:
-
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) &&
- (msg_linkprio(msg) != l_ptr->priority)) {
- pr_debug("%s<%s>, priority change %u->%u\n",
- link_rst_msg, l_ptr->name,
- l_ptr->priority, msg_linkprio(msg));
- l_ptr->priority = msg_linkprio(msg);
- tipc_link_reset(l_ptr); /* Enforce change to take effect */
- break;
- }
-
- /* Record reception; force mismatch at next timeout: */
- l_ptr->silent_intv_cnt = 0;
-
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
- l_ptr->stats.recv_states++;
- if (link_reset_unknown(l_ptr))
- break;
-
- if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg)))
- rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt);
-
- if (msg_probe(msg))
- l_ptr->stats.recv_probes++;
-
- /* Protocol message before retransmits, reduce loss risk */
- if (l_ptr->owner->bclink.recv_permitted)
- tipc_bclink_update_link_state(l_ptr->owner,
- msg_last_bcast(msg));
-
- if (rec_gap || (msg_probe(msg))) {
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0,
- rec_gap, 0, 0);
- }
- if (msg_seq_gap(msg)) {
- l_ptr->stats.recv_nacks++;
- tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq),
- msg_seq_gap(msg));
- }
- break;
- }
-exit:
- kfree_skb(buf);
-}
-
-
/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to
* a different bearer. Owner node is locked.
*/
@@ -1496,7 +1365,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr,
struct sk_buff *skb;
u32 length = msg_size(msg);
- tunnel = l_ptr->owner->active_links[selector & 1];
+ tunnel = node_active_link(l_ptr->owner, selector & 1);
if (!tipc_link_is_up(tunnel)) {
pr_warn("%stunnel link no longer available\n", link_co_err);
return;
@@ -1522,7 +1391,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr,
void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
{
int msgcount;
- struct tipc_link *tunnel = l_ptr->owner->active_links[0];
+ struct tipc_link *tunnel = node_active_link(l_ptr->owner, 0);
struct tipc_msg tunnel_hdr;
struct sk_buff *skb;
int split_bundles;
@@ -1556,8 +1425,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
return;
}
- split_bundles = (l_ptr->owner->active_links[0] !=
- l_ptr->owner->active_links[1]);
+ split_bundles = (node_active_link(l_ptr->owner, 0) !=
+ node_active_link(l_ptr->owner, 0));
skb_queue_walk(&l_ptr->transmq, skb) {
struct tipc_msg *msg = buf_msg(skb);
@@ -1660,7 +1529,7 @@ static bool tipc_link_failover_rcv(struct tipc_link *link,
if (bearer_id == link->bearer_id)
goto exit;
- pl = link->owner->links[bearer_id];
+ pl = link->owner->links[bearer_id].link;
if (pl && tipc_link_is_up(pl))
tipc_link_reset(pl);
@@ -1691,22 +1560,100 @@ static bool tipc_link_failover_rcv(struct tipc_link *link,
}
exit:
if (!link->failover_pkts && pl)
- pl->flags &= ~LINK_FAILINGOVER;
+ pl->exec_mode = TIPC_LINK_OPEN;
kfree_skb(*skb);
*skb = iskb;
return *skb;
}
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
+/* tipc_link_proto_rcv(): receive link level protocol message :
+ * Note that network plane id propagates through the network, and may
+ * change at any time. The node with lowest numerical id determines
+ * network plane
+ */
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u16 rcvgap = 0;
+ u16 nacked_gap = msg_seq_gap(hdr);
+ u16 peers_snd_nxt = msg_next_sent(hdr);
+ u16 peers_tol = msg_link_tolerance(hdr);
+ u16 peers_prio = msg_linkprio(hdr);
+ char *if_name;
+ int rc = 0;
- if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL))
- return;
+ if (l->exec_mode == TIPC_LINK_BLOCKED)
+ goto exit;
+
+ if (link_own_addr(l) > msg_prevnode(hdr))
+ l->net_plane = msg_net_plane(hdr);
+
+ switch (msg_type(hdr)) {
+ case RESET_MSG:
+
+ /* Ignore duplicate RESET with old session number */
+ if ((less_eq(msg_session(hdr), l->peer_session)) &&
+ (l->peer_session != WILDCARD_SESSION))
+ break;
+ /* fall thru' */
+ case ACTIVATE_MSG:
+
+ /* Complete own link name with peer's interface name */
+ if_name = strrchr(l->name, ':') + 1;
+ if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
+ break;
+ if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
+ break;
+ strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
+
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+ l->tolerance = peers_tol;
- l_ptr->tolerance = tol;
- l_ptr->keepalive_intv = msecs_to_jiffies(intv);
- l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv));
+ /* Update own priority if peer's priority is higher */
+ if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ l->priority = peers_prio;
+
+ l->peer_session = msg_session(hdr);
+ l->peer_bearer_id = msg_bearer_id(hdr);
+ rc = tipc_link_fsm_evt(l, msg_type(hdr), xmitq);
+ if (l->mtu > msg_max_pkt(hdr))
+ l->mtu = msg_max_pkt(hdr);
+ break;
+ case STATE_MSG:
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+ l->tolerance = peers_tol;
+
+ l->silent_intv_cnt = 0;
+ l->stats.recv_states++;
+ if (msg_probe(hdr))
+ l->stats.recv_probes++;
+ rc = tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq);
+ if (!tipc_link_is_up(l))
+ break;
+
+ /* Has peer sent packets we haven't received yet ? */
+ if (more(peers_snd_nxt, l->rcv_nxt))
+ rcvgap = peers_snd_nxt - l->rcv_nxt;
+ if (rcvgap || (msg_probe(hdr)))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
+ 0, l->mtu, xmitq);
+ tipc_link_release_pkts(l, msg_ack(hdr));
+
+ /* If NACK, retransmit will now start at right position */
+ if (nacked_gap) {
+ rc |= tipc_link_retransm(l, nacked_gap, xmitq);
+ l->stats.recv_nacks++;
+ }
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
+exit:
+ kfree_skb(skb);
+ return rc;
}
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
@@ -1743,7 +1690,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net,
list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
tipc_node_lock(n_ptr);
for (i = 0; i < MAX_BEARERS; i++) {
- l_ptr = n_ptr->links[i];
+ l_ptr = n_ptr->links[i].link;
if (l_ptr && !strcmp(l_ptr->name, link_name)) {
*bearer_id = i;
found_node = n_ptr;
@@ -1770,27 +1717,28 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
l_ptr->stats.recv_info = l_ptr->rcv_nxt;
}
-static void link_print(struct tipc_link *l_ptr, const char *str)
+static void link_print(struct tipc_link *l, const char *str)
{
- struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
- struct tipc_bearer *b_ptr;
-
- rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
- if (b_ptr)
- pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
- rcu_read_unlock();
-
- if (link_working_unknown(l_ptr))
- pr_cont(":WU\n");
- else if (link_reset_reset(l_ptr))
- pr_cont(":RR\n");
- else if (link_reset_unknown(l_ptr))
- pr_cont(":RU\n");
- else if (link_working_working(l_ptr))
- pr_cont(":WW\n");
+ struct sk_buff *hskb = skb_peek(&l->transmq);
+ u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
+ u16 tail = l->snd_nxt - 1;
+
+ pr_info("%s Link <%s>:", str, l->name);
+
+ if (link_probing(l))
+ pr_cont(":P\n");
+ else if (link_establishing(l))
+ pr_cont(":E\n");
+ else if (link_resetting(l))
+ pr_cont(":R\n");
+ else if (link_working(l))
+ pr_cont(":W\n");
else
pr_cont("\n");
+
+ pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
+ skb_queue_len(&l->transmq), head, tail,
+ skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
}
/* Parse and validate nested (link) properties valid for media, bearer and link
@@ -1865,7 +1813,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
res = -EINVAL;
goto out;
@@ -1885,7 +1833,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
u32 tol;
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- link_set_supervision_props(link, tol);
+ link->tolerance = tol;
tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0);
}
if (props[TIPC_NLA_PROP_PRIO]) {
@@ -2055,10 +2003,11 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
for (i = *prev_link; i < MAX_BEARERS; i++) {
*prev_link = i;
- if (!node->links[i])
+ if (!node->links[i].link)
continue;
- err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI);
+ err = __tipc_nl_add_link(net, msg,
+ node->links[i].link, NLM_F_MULTI);
if (err)
return err;
}
@@ -2172,7 +2121,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
tipc_node_unlock(node);
nlmsg_free(msg.skb);
@@ -2227,7 +2176,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
tipc_node_unlock(node);
return -EINVAL;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index ae0a0ea572f2..37cfd7d7bf7d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -49,19 +49,21 @@
*/
#define INVALID_LINK_SEQ 0x10000
-/* Link working states
+
+/* Link endpoint receive states
*/
-#define WORKING_WORKING 560810u
-#define WORKING_UNKNOWN 560811u
-#define RESET_UNKNOWN 560812u
-#define RESET_RESET 560813u
+enum {
+ TIPC_LINK_OPEN,
+ TIPC_LINK_BLOCKED,
+ TIPC_LINK_TUNNEL
+};
-/* Link endpoint execution states
+/* Events returned from link at packet reception or at timeout
*/
-#define LINK_STARTED 0x0001
-#define LINK_STOPPED 0x0002
-#define LINK_SYNCHING 0x0004
-#define LINK_FAILINGOVER 0x0008
+enum {
+ TIPC_LINK_UP_EVT = 1,
+ TIPC_LINK_DOWN_EVT = (1 << 1)
+};
/* Starting value for maximum packet size negotiation on unicast links
* (unless bearer MTU is less)
@@ -106,7 +108,6 @@ struct tipc_stats {
* @timer: link timer
* @owner: pointer to peer node
* @refcnt: reference counter for permanent references (owner node & timer)
- * @flags: execution state flags for link endpoint instance
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
@@ -119,6 +120,7 @@ struct tipc_stats {
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
+ * @exec_mode: transmit/receive mode for link endpoint instance
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -144,12 +146,9 @@ struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
struct tipc_media_addr media_addr;
- struct timer_list timer;
struct tipc_node *owner;
- struct kref ref;
/* Management and link supervision data */
- unsigned int flags;
u32 peer_session;
u32 peer_bearer_id;
u32 bearer_id;
@@ -165,6 +164,7 @@ struct tipc_link {
struct tipc_msg *pmsg;
u32 priority;
char net_plane;
+ u8 exec_mode;
u16 synch_point;
/* Failover */
@@ -192,8 +192,8 @@ struct tipc_link {
u16 rcv_nxt;
u32 rcv_unacked;
struct sk_buff_head deferdq;
- struct sk_buff_head inputq;
- struct sk_buff_head namedq;
+ struct sk_buff_head *inputq;
+ struct sk_buff_head *namedq;
/* Congestion handling */
struct sk_buff_head wakeupq;
@@ -207,9 +207,11 @@ struct tipc_link {
struct tipc_port;
-struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr);
+struct tipc_link *tipc_link_create(struct tipc_node *n,
+ struct tipc_bearer *b,
+ const struct tipc_media_addr *maddr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq);
void tipc_link_delete(struct tipc_link *link);
void tipc_link_delete_list(struct net *net, unsigned int bearer_id);
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
@@ -221,12 +223,11 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr);
void tipc_link_purge_backlog(struct tipc_link *l);
void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
- u32 selector);
-int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
- u32 selector);
+void tipc_link_activate(struct tipc_link *link);
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
struct sk_buff_head *list);
+int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq);
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
u32 gap, u32 tolerance, u32 priority);
void tipc_link_push_packets(struct tipc_link *l_ptr);
@@ -243,33 +244,12 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
void link_prepare_wakeup(struct tipc_link *l);
-
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
static inline u32 link_own_addr(struct tipc_link *l)
{
return msg_prevnode(l->pmsg);
}
-/*
- * Link status checking routines
- */
-static inline int link_working_working(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_WORKING;
-}
-
-static inline int link_working_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_UNKNOWN;
-}
-
-static inline int link_reset_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_UNKNOWN;
-}
-
-static inline int link_reset_reset(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_RESET;
-}
-
#endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 19c45fb66238..2f1563b47e24 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -38,6 +38,7 @@
#define _TIPC_MSG_H
#include <linux/tipc.h>
+#include "core.h"
/*
* Constants and routines used to read and write TIPC payload message headers
@@ -658,12 +659,12 @@ static inline void msg_set_link_selector(struct tipc_msg *m, u32 n)
/*
* Word 5
*/
-static inline u32 msg_session(struct tipc_msg *m)
+static inline u16 msg_session(struct tipc_msg *m)
{
return msg_bits(m, 5, 16, 0xffff);
}
-static inline void msg_set_session(struct tipc_msg *m, u32 n)
+static inline void msg_set_session(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 5, 16, 0xffff, n);
}
@@ -766,6 +767,22 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
+static inline bool msg_is_traffic(struct tipc_msg *m)
+{
+ if (likely(msg_user(m) != LINK_PROTOCOL))
+ return true;
+ if ((msg_type(m) == RESET_MSG) || (msg_type(m) == ACTIVATE_MSG))
+ return false;
+ return true;
+}
+
+static inline bool msg_peer_is_up(struct tipc_msg *m)
+{
+ if (likely(msg_is_traffic(m)))
+ return false;
+ return msg_redundant_link(m);
+}
+
struct sk_buff *tipc_buf_acquire(u32 size);
bool tipc_msg_validate(struct sk_buff *skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
@@ -879,4 +896,36 @@ static inline bool tipc_skb_queue_tail(struct sk_buff_head *list,
return rv;
}
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
+ * @list: list to be appended to
+ * @skb: buffer to add
+ * Returns true if queue should treated further, otherwise false
+ */
+static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
+ struct sk_buff *skb)
+{
+ struct sk_buff *_skb, *tmp;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u16 seqno = msg_seqno(hdr);
+
+ if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
+ __skb_queue_head(list, skb);
+ return true;
+ }
+ if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
+ __skb_queue_head(list, skb);
+ return true;
+ }
+ if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (likely(less(seqno, buf_seqno(_skb)))) {
+ __skb_queue_before(list, _skb, skb);
+ return true;
+ }
+ }
+ }
+ __skb_queue_tail(list, skb);
+ return false;
+}
+
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 41e7b7e4dda0..e6018b7eb197 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -96,13 +96,13 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
dnode = node->addr;
if (in_own_node(net, dnode))
continue;
- if (!tipc_node_active_links(node))
+ if (!tipc_node_is_up(node))
continue;
oskb = pskb_copy(skb, GFP_ATOMIC);
if (!oskb)
break;
msg_set_destnode(buf_msg(oskb), dnode);
- tipc_link_xmit_skb(net, oskb, dnode, dnode);
+ tipc_node_xmit_skb(net, oskb, dnode, dnode);
}
rcu_read_unlock();
@@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
&tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
rcu_read_unlock();
- tipc_link_xmit(net, &head, dnode, dnode);
+ tipc_node_xmit(net, &head, dnode, dnode);
}
static void tipc_publ_subscribe(struct net *net, struct publication *publ,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0b1d61a5f853..e92f84afbf95 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,10 +40,13 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "discover.h"
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
static void tipc_node_delete(struct tipc_node *node);
+static void tipc_node_timeout(unsigned long data);
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
struct tipc_sock_conn {
u32 port;
@@ -132,6 +135,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->publ_list);
INIT_LIST_HEAD(&n_ptr->conn_sks);
+ skb_queue_head_init(&n_ptr->bclink.namedq);
__skb_queue_head_init(&n_ptr->bclink.deferdq);
hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -139,14 +143,32 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
break;
}
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
- n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
+ n_ptr->state = SELF_DOWN_PEER_LEAVING;
n_ptr->signature = INVALID_NODE_SIG;
+ n_ptr->active_links[0] = INVALID_BEARER_ID;
+ n_ptr->active_links[1] = INVALID_BEARER_ID;
tipc_node_get(n_ptr);
+ setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
+ n_ptr->keepalive_intv = U32_MAX;
exit:
spin_unlock_bh(&tn->node_list_lock);
return n_ptr;
}
+static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
+{
+ unsigned long tol = l->tolerance;
+ unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
+ unsigned long keepalive_intv = msecs_to_jiffies(intv);
+
+ /* Link with lowest tolerance determines timer interval */
+ if (keepalive_intv < n->keepalive_intv)
+ n->keepalive_intv = keepalive_intv;
+
+ /* Ensure link's abort limit corresponds to current interval */
+ l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv);
+}
+
static void tipc_node_delete(struct tipc_node *node)
{
list_del_rcu(&node->list);
@@ -160,8 +182,11 @@ void tipc_node_stop(struct net *net)
struct tipc_node *node, *t_node;
spin_lock_bh(&tn->node_list_lock);
- list_for_each_entry_safe(node, t_node, &tn->node_list, list)
+ list_for_each_entry_safe(node, t_node, &tn->node_list, list) {
+ if (del_timer(&node->timer))
+ tipc_node_put(node);
tipc_node_put(node);
+ }
spin_unlock_bh(&tn->node_list_lock);
}
@@ -219,131 +244,170 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
tipc_node_put(node);
}
+/* tipc_node_timeout - handle expiration of node timer
+ */
+static void tipc_node_timeout(unsigned long data)
+{
+ struct tipc_node *n = (struct tipc_node *)data;
+ struct sk_buff_head xmitq;
+ struct tipc_link *l;
+ struct tipc_media_addr *maddr;
+ int bearer_id;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ tipc_node_lock(n);
+ l = n->links[bearer_id].link;
+ if (l) {
+ /* Link tolerance may change asynchronously: */
+ tipc_node_calculate_timer(n, l);
+ rc = tipc_link_timeout(l, &xmitq);
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_link_reset(l);
+ }
+ tipc_node_unlock(n);
+ maddr = &n->links[bearer_id].maddr;
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ }
+ if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ tipc_node_get(n);
+ tipc_node_put(n);
+}
+
/**
* tipc_node_link_up - handle addition of link
*
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+void tipc_node_link_up(struct tipc_node *n, int bearer_id)
{
- struct tipc_link **active = &n_ptr->active_links[0];
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ struct tipc_link_entry *links = n->links;
+ struct tipc_link *l = n->links[bearer_id].link;
- n_ptr->working_links++;
- n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
- n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
+ /* Leave room for tunnel header when returning 'mtu' to users: */
+ links[bearer_id].mtu = l->mtu - INT_H_SIZE;
+
+ n->working_links++;
+ n->action_flags |= TIPC_NOTIFY_LINK_UP;
+ n->link_id = l->peer_bearer_id << 16 | l->bearer_id;
pr_debug("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
+ l->name, l->net_plane);
- if (!active[0]) {
- active[0] = active[1] = l_ptr;
- node_established_contact(n_ptr);
- goto exit;
+ /* No active links ? => take both active slots */
+ if (*slot0 < 0) {
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
+ node_established_contact(n);
+ return;
}
- if (l_ptr->priority < active[0]->priority) {
- pr_debug("New link <%s> becomes standby\n", l_ptr->name);
- goto exit;
+
+ /* Lower prio than current active ? => no slot */
+ if (l->priority < links[*slot0].link->priority) {
+ pr_debug("New link <%s> becomes standby\n", l->name);
+ return;
}
- tipc_link_dup_queue_xmit(active[0], l_ptr);
- if (l_ptr->priority == active[0]->priority) {
- active[0] = l_ptr;
- goto exit;
+ tipc_link_dup_queue_xmit(links[*slot0].link, l);
+
+ /* Same prio as current active ? => take one slot */
+ if (l->priority == links[*slot0].link->priority) {
+ *slot0 = bearer_id;
+ return;
}
- pr_debug("Old link <%s> becomes standby\n", active[0]->name);
- if (active[1] != active[0])
- pr_debug("Old link <%s> becomes standby\n", active[1]->name);
- active[0] = active[1] = l_ptr;
-exit:
- /* Leave room for changeover header when returning 'mtu' to users: */
- n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE;
- n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE;
+
+ /* Higher prio than current active => take both active slots */
+ pr_debug("Old link <%s> now standby\n", links[*slot0].link->name);
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
}
/**
- * node_select_active_links - select active link
+ * tipc_node_link_down - handle loss of link
*/
-static void node_select_active_links(struct tipc_node *n_ptr)
+void tipc_node_link_down(struct tipc_node *n, int bearer_id)
{
- struct tipc_link **active = &n_ptr->active_links[0];
- u32 i;
- u32 highest_prio = 0;
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ int i, highest = 0;
+ struct tipc_link *l, *_l;
- active[0] = active[1] = NULL;
+ l = n->links[bearer_id].link;
+ n->working_links--;
+ n->action_flags |= TIPC_NOTIFY_LINK_DOWN;
+ n->link_id = l->peer_bearer_id << 16 | l->bearer_id;
- for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
+ pr_debug("Lost link <%s> on network plane %c\n",
+ l->name, l->net_plane);
- if (!l_ptr || !tipc_link_is_up(l_ptr) ||
- (l_ptr->priority < highest_prio))
+ /* Select new active link if any available */
+ *slot0 = INVALID_BEARER_ID;
+ *slot1 = INVALID_BEARER_ID;
+ for (i = 0; i < MAX_BEARERS; i++) {
+ _l = n->links[i].link;
+ if (!_l || !tipc_link_is_up(_l))
+ continue;
+ if (_l->priority < highest)
+ continue;
+ if (_l->priority > highest) {
+ highest = _l->priority;
+ *slot0 = i;
+ *slot1 = i;
continue;
-
- if (l_ptr->priority > highest_prio) {
- highest_prio = l_ptr->priority;
- active[0] = active[1] = l_ptr;
- } else {
- active[1] = l_ptr;
}
+ *slot1 = i;
}
+ if (tipc_node_is_up(n))
+ tipc_link_failover_send_queue(l);
+ else
+ node_lost_contact(n);
}
-/**
- * tipc_node_link_down - handle loss of link
- */
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+bool tipc_node_is_up(struct tipc_node *n)
{
- struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
- struct tipc_link **active;
-
- n_ptr->working_links--;
- n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN;
- n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
-
- if (!tipc_link_is_active(l_ptr)) {
- pr_debug("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
- return;
- }
- pr_debug("Lost link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
-
- active = &n_ptr->active_links[0];
- if (active[0] == l_ptr)
- active[0] = active[1];
- if (active[1] == l_ptr)
- active[1] = active[0];
- if (active[0] == l_ptr)
- node_select_active_links(n_ptr);
- if (tipc_node_is_up(n_ptr))
- tipc_link_failover_send_queue(l_ptr);
- else
- node_lost_contact(n_ptr);
-
- /* Leave room for changeover header when returning 'mtu' to users: */
- if (active[0]) {
- n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE;
- n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE;
- return;
- }
- /* Loopback link went down? No fragmentation needed from now on. */
- if (n_ptr->addr == tn->own_addr) {
- n_ptr->act_mtus[0] = MAX_MSG_SIZE;
- n_ptr->act_mtus[1] = MAX_MSG_SIZE;
- }
+ return n->active_links[0] != INVALID_BEARER_ID;
}
-int tipc_node_active_links(struct tipc_node *n_ptr)
+void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *b,
+ bool *link_up, bool *addr_match,
+ struct tipc_media_addr *maddr)
{
- return n_ptr->active_links[0] != NULL;
+ struct tipc_link *l = n->links[b->identity].link;
+ struct tipc_media_addr *curr = &n->links[b->identity].maddr;
+
+ *link_up = l && tipc_link_is_up(l);
+ *addr_match = l && !memcmp(curr, maddr, sizeof(*maddr));
}
-int tipc_node_is_up(struct tipc_node *n_ptr)
+bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b,
+ struct tipc_media_addr *maddr)
{
- return tipc_node_active_links(n_ptr);
+ struct tipc_link *l = n->links[b->identity].link;
+ struct tipc_media_addr *curr = &n->links[b->identity].maddr;
+ struct sk_buff_head *inputq = &n->links[b->identity].inputq;
+
+ if (!l) {
+ l = tipc_link_create(n, b, maddr, inputq, &n->bclink.namedq);
+ if (!l)
+ return false;
+ tipc_node_calculate_timer(n, l);
+ if (n->link_cnt == 1) {
+ if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ tipc_node_get(n);
+ }
+ }
+ memcpy(&l->media_addr, maddr, sizeof(*maddr));
+ memcpy(curr, maddr, sizeof(*maddr));
+ tipc_link_reset(l);
+ return true;
}
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- n_ptr->links[l_ptr->bearer_id] = l_ptr;
+ n_ptr->links[l_ptr->bearer_id].link = l_ptr;
n_ptr->link_cnt++;
}
@@ -352,15 +416,151 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
int i;
for (i = 0; i < MAX_BEARERS; i++) {
- if (l_ptr != n_ptr->links[i])
+ if (l_ptr != n_ptr->links[i].link)
continue;
- n_ptr->links[i] = NULL;
+ n_ptr->links[i].link = NULL;
n_ptr->link_cnt--;
}
}
+/* tipc_node_fsm_evt - node finite state machine
+ * Determines when contact is allowed with peer node
+ */
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
+{
+ int state = n->state;
+
+ switch (state) {
+ case SELF_DOWN_PEER_DOWN:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_COMING;
+ break;
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_COMING_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ case SELF_UP_PEER_UP:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ case SELF_DOWN_PEER_LEAVING:
+ switch (evt) {
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case SELF_LOST_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ case SELF_UP_PEER_COMING:
+ switch (evt) {
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ case SELF_COMING_PEER_UP:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ case SELF_LEAVING_PEER_DOWN:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ default:
+ pr_err("Unknown node fsm evt %x/%x\n", state, evt);
+ }
+ break;
+ default:
+ pr_err("Unknown node fsm state %x\n", state);
+ break;
+ }
+
+ n->state = state;
+}
+
+bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_link *l,
+ struct tipc_msg *hdr)
+{
+ int state = n->state;
+
+ if (likely(state == SELF_UP_PEER_UP))
+ return true;
+
+ if (state == SELF_DOWN_PEER_DOWN)
+ return true;
+
+ if (state == SELF_UP_PEER_COMING) {
+ /* If not traffic msg, peer may still be ESTABLISHING */
+ if (tipc_link_is_up(l) && msg_is_traffic(hdr))
+ tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT);
+ return true;
+ }
+
+ if (state == SELF_COMING_PEER_UP)
+ return true;
+
+ if (state == SELF_LEAVING_PEER_DOWN)
+ return false;
+
+ if (state == SELF_DOWN_PEER_LEAVING) {
+ if (msg_peer_is_up(hdr))
+ return false;
+ tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ return true;
+ }
+ return false;
+}
+
static void node_established_contact(struct tipc_node *n_ptr)
{
+ tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
n_ptr->bclink.oos_state = 0;
n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
@@ -396,21 +596,18 @@ static void node_lost_contact(struct tipc_node *n_ptr)
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
+ struct tipc_link *l_ptr = n_ptr->links[i].link;
if (!l_ptr)
continue;
- l_ptr->flags &= ~LINK_FAILINGOVER;
+ l_ptr->exec_mode = TIPC_LINK_OPEN;
l_ptr->failover_checkpt = 0;
l_ptr->failover_pkts = 0;
kfree_skb(l_ptr->failover_skb);
l_ptr->failover_skb = NULL;
tipc_link_reset_fragments(l_ptr);
}
-
- n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
-
/* Prevent re-contact with node until cleanup is done */
- n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
+ tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
/* Notify publications from this node */
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
@@ -453,7 +650,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
goto exit;
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (link) {
strncpy(linkname, link->name, len);
err = 0;
@@ -559,6 +756,160 @@ msg_full:
return -EMSGSIZE;
}
+static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel,
+ int *bearer_id,
+ struct tipc_media_addr **maddr)
+{
+ int id = n->active_links[sel & 1];
+
+ if (unlikely(id < 0))
+ return NULL;
+
+ *bearer_id = id;
+ *maddr = &n->links[id].maddr;
+ return n->links[id].link;
+}
+
+/**
+ * tipc_node_xmit() is the general link level function for message sending
+ * @net: the applicable net namespace
+ * @list: chain of buffers containing message
+ * @dnode: address of destination node
+ * @selector: a number used for deterministic link selection
+ * Consumes the buffer chain, except when returning -ELINKCONG
+ * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ */
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
+ u32 dnode, int selector)
+{
+ struct tipc_link *l = NULL;
+ struct tipc_node *n;
+ struct sk_buff_head xmitq;
+ struct tipc_media_addr *maddr;
+ int bearer_id;
+ int rc = -EHOSTUNREACH;
+
+ __skb_queue_head_init(&xmitq);
+ n = tipc_node_find(net, dnode);
+ if (likely(n)) {
+ tipc_node_lock(n);
+ l = tipc_node_select_link(n, selector, &bearer_id, &maddr);
+ if (likely(l))
+ rc = tipc_link_xmit(l, list, &xmitq);
+ if (unlikely(rc == -ENOBUFS))
+ tipc_link_reset(l);
+ tipc_node_unlock(n);
+ tipc_node_put(n);
+ }
+ if (likely(!rc)) {
+ tipc_bearer_xmit(net, bearer_id, &xmitq, maddr);
+ return 0;
+ }
+ if (likely(in_own_node(net, dnode))) {
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
+ return rc;
+}
+
+/* tipc_node_xmit_skb(): send single buffer to destination
+ * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
+ * messages, which will not be rejected
+ * The only exception is datagram messages rerouted after secondary
+ * lookup, which are rare and safe to dispose of anyway.
+ * TODO: Return real return value, and let callers use
+ * tipc_wait_for_sendpkt() where applicable
+ */
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
+ u32 selector)
+{
+ struct sk_buff_head head;
+ int rc;
+
+ skb_queue_head_init(&head);
+ __skb_queue_tail(&head, skb);
+ rc = tipc_node_xmit(net, &head, dnode, selector);
+ if (rc == -ELINKCONG)
+ kfree_skb(skb);
+ return 0;
+}
+
+/**
+ * tipc_rcv - process TIPC packets/messages arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @bearer: pointer to bearer message arrived on
+ *
+ * Invoked with no locks held. Bearer pointer must point to a valid bearer
+ * structure (i.e. cannot be NULL), but bearer can be inactive.
+ */
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_node *n;
+ struct tipc_link *l;
+ struct tipc_msg *hdr;
+ struct tipc_media_addr *maddr;
+ int bearer_id = b->identity;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+
+ /* Ensure message is well-formed */
+ if (unlikely(!tipc_msg_validate(skb)))
+ goto discard;
+
+ /* Handle arrival of a non-unicast link packet */
+ hdr = buf_msg(skb);
+ if (unlikely(msg_non_seq(hdr))) {
+ if (msg_user(hdr) == LINK_CONFIG)
+ tipc_disc_rcv(net, skb, b);
+ else
+ tipc_bclink_rcv(net, skb);
+ return;
+ }
+
+ /* Locate neighboring node that sent packet */
+ n = tipc_node_find(net, msg_prevnode(hdr));
+ if (unlikely(!n))
+ goto discard;
+ tipc_node_lock(n);
+
+ /* Locate link endpoint that should handle packet */
+ l = n->links[bearer_id].link;
+ if (unlikely(!l))
+ goto unlock;
+
+ /* Is reception of this packet permitted at the moment ? */
+ if (unlikely(n->state != SELF_UP_PEER_UP))
+ if (!tipc_node_filter_skb(n, l, hdr))
+ goto unlock;
+
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+ tipc_bclink_sync_state(n, hdr);
+
+ /* Release acked broadcast messages */
+ if (unlikely(n->bclink.acked != msg_bcast_ack(hdr)))
+ tipc_bclink_acknowledge(n, msg_bcast_ack(hdr));
+
+ /* Check protocol and update link state */
+ rc = tipc_link_rcv(l, skb, &xmitq);
+
+ if (unlikely(rc & TIPC_LINK_UP_EVT))
+ tipc_link_activate(l);
+ if (unlikely(rc & TIPC_LINK_DOWN_EVT))
+ tipc_link_reset(l);
+ skb = NULL;
+unlock:
+ tipc_node_unlock(n);
+ tipc_sk_rcv(net, &n->links[bearer_id].inputq);
+ maddr = &n->links[bearer_id].maddr;
+ tipc_bearer_xmit(net, bearer_id, &xmitq, maddr);
+ tipc_node_put(n);
+discard:
+ kfree_skb(skb);
+}
+
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5a834cf142c8..5e7016802077 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,6 +45,26 @@
/* Out-of-range value for node signature */
#define INVALID_NODE_SIG 0x10000
+#define INVALID_BEARER_ID -1
+
+/* Node FSM states and events:
+ */
+enum {
+ SELF_DOWN_PEER_DOWN = 0xdd,
+ SELF_UP_PEER_UP = 0xaa,
+ SELF_DOWN_PEER_LEAVING = 0xd1,
+ SELF_UP_PEER_COMING = 0xac,
+ SELF_COMING_PEER_UP = 0xca,
+ SELF_LEAVING_PEER_DOWN = 0x1d,
+};
+
+enum {
+ SELF_ESTABL_CONTACT_EVT = 0xec,
+ SELF_LOST_CONTACT_EVT = 0x1c,
+ PEER_ESTABL_CONTACT_EVT = 0xfec,
+ PEER_LOST_CONTACT_EVT = 0xf1c
+};
+
/* Flags used to take different actions according to flag type
* TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
* TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
@@ -54,8 +74,6 @@
*/
enum {
TIPC_MSG_EVT = 1,
- TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
- TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
TIPC_NOTIFY_NODE_UP = (1 << 4),
TIPC_WAKEUP_BCAST_USERS = (1 << 5),
@@ -85,10 +103,17 @@ struct tipc_node_bclink {
u32 deferred_size;
struct sk_buff_head deferdq;
struct sk_buff *reasm_buf;
- int inputq_map;
+ struct sk_buff_head namedq;
bool recv_permitted;
};
+struct tipc_link_entry {
+ struct tipc_link *link;
+ u32 mtu;
+ struct sk_buff_head inputq;
+ struct tipc_media_addr maddr;
+};
+
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
@@ -98,9 +123,8 @@ struct tipc_node_bclink {
* @hash: links to adjacent nodes in unsorted hash chain
* @inputq: pointer to input queue containing messages for msg event
* @namedq: pointer to name table input queue with name table messages
- * @curr_link: the link holding the node lock, if any
- * @active_links: pointers to active links to node
- * @links: pointers to all links to node
+ * @active_links: bearer ids of active links, used as index into links[] array
+ * @links: array containing references to all links to node
* @action_flags: bit mask of different types of node actions
* @bclink: broadcast-related info
* @list: links to adjacent nodes in sorted list of cluster's nodes
@@ -120,12 +144,12 @@ struct tipc_node {
struct hlist_node hash;
struct sk_buff_head *inputq;
struct sk_buff_head *namedq;
- struct tipc_link *active_links[2];
- u32 act_mtus[2];
- struct tipc_link *links[MAX_BEARERS];
+ int active_links[2];
+ struct tipc_link_entry links[MAX_BEARERS];
int action_flags;
struct tipc_node_bclink bclink;
struct list_head list;
+ int state;
int link_cnt;
u16 working_links;
u16 capabilities;
@@ -133,6 +157,8 @@ struct tipc_node {
u32 link_id;
struct list_head publ_list;
struct list_head conn_sks;
+ unsigned long keepalive_intv;
+ struct timer_list timer;
struct rcu_head rcu;
};
@@ -140,18 +166,25 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr);
void tipc_node_put(struct tipc_node *node);
struct tipc_node *tipc_node_create(struct net *net, u32 addr);
void tipc_node_stop(struct net *net);
+void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *bearer,
+ bool *link_up, bool *addr_match,
+ struct tipc_media_addr *maddr);
+bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer,
+ struct tipc_media_addr *maddr);
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-int tipc_node_active_links(struct tipc_node *n_ptr);
-int tipc_node_is_up(struct tipc_node *n_ptr);
+void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id);
+void tipc_node_link_up(struct tipc_node *n_ptr, int bearer_id);
+bool tipc_node_is_up(struct tipc_node *n);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
void tipc_node_unlock(struct tipc_node *node);
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
+ int selector);
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
+ u32 selector);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
static inline void tipc_node_lock(struct tipc_node *node)
@@ -159,26 +192,30 @@ static inline void tipc_node_lock(struct tipc_node *node)
spin_lock_bh(&node->lock);
}
-static inline bool tipc_node_blocked(struct tipc_node *node)
+static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
{
- return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
- TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
+ int bearer_id = n->active_links[sel & 1];
+
+ if (unlikely(bearer_id == INVALID_BEARER_ID))
+ return NULL;
+
+ return n->links[bearer_id].link;
}
-static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector)
+static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
{
- struct tipc_node *node;
- u32 mtu;
-
- node = tipc_node_find(net, addr);
+ struct tipc_node *n;
+ int bearer_id;
+ unsigned int mtu = MAX_MSG_SIZE;
- if (likely(node)) {
- mtu = node->act_mtus[selector & 1];
- tipc_node_put(node);
- } else {
- mtu = MAX_MSG_SIZE;
- }
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return mtu;
+ bearer_id = n->active_links[sel & 1];
+ if (likely(bearer_id != INVALID_BEARER_ID))
+ mtu = n->links[bearer_id].mtu;
+ tipc_node_put(n);
return mtu;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3a7567f690f3..5b0b08d58fcc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -261,7 +261,7 @@ static void tsk_rej_rx_queue(struct sock *sk)
while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT))
- tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
+ tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0);
}
}
@@ -443,7 +443,7 @@ static int tipc_release(struct socket *sock)
}
if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
TIPC_ERR_NO_PORT))
- tipc_link_xmit_skb(net, skb, dnode, 0);
+ tipc_node_xmit_skb(net, skb, dnode, 0);
}
}
@@ -456,7 +456,7 @@ static int tipc_release(struct socket *sock)
tsk_own_node(tsk), tsk_peer_port(tsk),
tsk->portid, TIPC_ERR_NO_PORT);
if (skb)
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
tipc_node_remove_conn(net, dnode, tsk->portid);
}
@@ -686,21 +686,22 @@ new_mtu:
do {
rc = tipc_bclink_xmit(net, pktchain);
- if (likely(rc >= 0)) {
- rc = dsz;
- break;
+ if (likely(!rc))
+ return dsz;
+
+ if (rc == -ELINKCONG) {
+ tsk->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (!rc)
+ continue;
}
+ __skb_queue_purge(pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
goto new_mtu;
}
- if (rc != -ELINKCONG)
- break;
- tipc_sk(sk)->link_cong = 1;
- rc = tipc_wait_for_sndmsg(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
- } while (!rc);
+ break;
+ } while (1);
return rc;
}
@@ -924,24 +925,25 @@ new_mtu:
do {
skb = skb_peek(pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
- rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid);
- if (likely(rc >= 0)) {
+ rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+ if (likely(!rc)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
- rc = dsz;
- break;
+ return dsz;
}
+ if (rc == -ELINKCONG) {
+ tsk->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (!rc)
+ continue;
+ }
+ __skb_queue_purge(pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
}
- if (rc != -ELINKCONG)
- break;
- tsk->link_cong = 1;
- rc = tipc_wait_for_sndmsg(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
- } while (!rc);
+ break;
+ } while (1);
return rc;
}
@@ -1043,15 +1045,16 @@ next:
return rc;
do {
if (likely(!tsk_conn_cong(tsk))) {
- rc = tipc_link_xmit(net, pktchain, dnode, portid);
+ rc = tipc_node_xmit(net, pktchain, dnode, portid);
if (likely(!rc)) {
tsk->sent_unacked++;
sent += send;
if (sent == dsz)
- break;
+ return dsz;
goto next;
}
if (rc == -EMSGSIZE) {
+ __skb_queue_purge(pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid);
m->msg_iter = save;
@@ -1059,13 +1062,13 @@ next:
}
if (rc != -ELINKCONG)
break;
+
tsk->link_cong = 1;
}
rc = tipc_wait_for_sndpkt(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
} while (!rc);
+ __skb_queue_purge(pktchain);
return sent ? sent : rc;
}
@@ -1221,7 +1224,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
return;
msg = buf_msg(skb);
msg_set_msgcnt(msg, ack);
- tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1700,7 +1703,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err))
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
return 0;
}
@@ -1796,7 +1799,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
continue;
xmit:
- tipc_link_xmit_skb(net, skb, dnode, dport);
+ tipc_node_xmit_skb(net, skb, dnode, dport);
}
return err ? -EHOSTUNREACH : 0;
}
@@ -2089,7 +2092,7 @@ restart:
}
if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
TIPC_CONN_SHUTDOWN))
- tipc_link_xmit_skb(net, skb, dnode,
+ tipc_node_xmit_skb(net, skb, dnode,
tsk->portid);
} else {
dnode = tsk_peer_node(tsk);
@@ -2099,7 +2102,7 @@ restart:
0, dnode, tsk_own_node(tsk),
tsk_peer_port(tsk),
tsk->portid, TIPC_CONN_SHUTDOWN);
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
}
tsk->connected = 0;
sock->state = SS_DISCONNECTING;
@@ -2161,7 +2164,7 @@ static void tipc_sk_timeout(unsigned long data)
}
bh_unlock_sock(sk);
if (skb)
- tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
+ tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
exit:
sock_put(sk);
}