diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
62 files changed, 10911 insertions, 2128 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 9ca3734ebb6b..5098e7f21987 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -24,13 +24,6 @@ config MLX4_EN_DCB If unsure, set to Y -config MLX4_EN_VXLAN - bool "VXLAN offloads Support" - default y - depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m) - ---help--- - Say Y here if you want to use VXLAN offloads in the driver. - config MLX4_CORE tristate depends on PCI diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index f01918c63f28..99c6bbdff501 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -37,6 +37,11 @@ #include "mlx4_en.h" #include "fw_qos.h" +enum { + MLX4_CEE_STATE_DOWN = 0, + MLX4_CEE_STATE_UP = 1, +}; + /* Definitions for QCN */ @@ -80,13 +85,202 @@ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics { __be32 reserved3[4]; }; +static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + switch (capid) { + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->cee_params.dcbx_cap; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx4_max_tc(priv->mdev->dev); + break; + default: + *cap = false; + break; + } + + return 0; +} + +static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + return priv->cee_params.dcb_cfg.pfc_state; +} + +static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_params.dcb_cfg.pfc_state = state; +} + +static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + *setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc; +} + +static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting; + priv->cee_params.dcb_cfg.pfc_state = true; +} + +static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return -EINVAL; + + if (tcid == DCB_NUMTCS_ATTR_PFC) + *num = mlx4_max_tc(priv->mdev->dev); + else + *num = 0; + + return 0; +} + +static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg; + int err = 0; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + if (dcb_cfg->pfc_state) { + int tc; + + priv->prof->rx_pause = 0; + priv->prof->tx_pause = 0; + for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) { + u8 tc_mask = 1 << tc; + + switch (dcb_cfg->tc_config[tc].dcb_pfc) { + case pfc_disabled: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_full: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + case pfc_enabled_tx: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_rx: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + default: + break; + } + } + en_dbg(DRV, priv, "Set pfc on\n"); + } else { + priv->prof->rx_pause = 1; + priv->prof->tx_pause = 1; + en_dbg(DRV, priv, "Set pfc off\n"); + } + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) + en_err(priv, "Failed setting pause params\n"); + return err; +} + +static u8 mlx4_en_dcbnl_get_state(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED) + return MLX4_CEE_STATE_UP; + + return MLX4_CEE_STATE_DOWN; +} + +static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int num_tcs = 0; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return 0; + + if (state) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + num_tcs = IEEE_8021QAZ_MAX_TCS; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + } + + return mlx4_en_setup_tc(dev, num_tcs); +} + +/* On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 0; + + return dcb_getapp(netdev, &app); +} + +static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype, + u16 id, u8 up) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + memset(&app, 0, sizeof(struct dcb_app)); + app.selector = idtype; + app.protocol = id; + app.priority = up; + + return dcb_setapp(netdev, &app); +} + static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct mlx4_en_priv *priv = netdev_priv(dev); struct ieee_ets *my_ets = &priv->ets; - /* No IEEE PFC settings available */ if (!my_ets) return -EINVAL; @@ -237,18 +431,51 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) { - return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->cee_params.dcbx_cap; } static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) { + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets ets = {0}; + struct ieee_pfc pfc = {0}; + + if (mode == priv->cee_params.dcbx_cap) + return 0; + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || - (mode & DCB_CAP_DCBX_VER_CEE) || - !(mode & DCB_CAP_DCBX_VER_IEEE) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && + (mode & DCB_CAP_DCBX_VER_CEE)) || !(mode & DCB_CAP_DCBX_HOST)) - return 1; + goto err; + + priv->cee_params.dcbx_cap = mode; + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS; + + if (mode & DCB_CAP_DCBX_VER_IEEE) { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + } else if (mode & DCB_CAP_DCBX_VER_CEE) { + if (mlx4_en_dcbnl_set_all(dev)) + goto err; + } else { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + if (mlx4_en_setup_tc(dev, 0)) + goto err; + } return 0; +err: + return 1; } #define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */ @@ -463,24 +690,46 @@ static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev, } const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { - .ieee_getets = mlx4_en_dcbnl_ieee_getets, - .ieee_setets = mlx4_en_dcbnl_ieee_setets, - .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, - .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, - .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, - .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .ieee_getets = mlx4_en_dcbnl_ieee_getets, + .ieee_setets = mlx4_en_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, + .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, + .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, + .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getstate = mlx4_en_dcbnl_get_state, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getcap = mlx4_en_dcbnl_getcap, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, - .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, - .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, - .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, }; const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, + .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index fc95affaf76b..bdda17d2ea0f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; u32 rx_size, tx_size; int port_up = 0; int err = 0; @@ -1061,22 +1063,25 @@ static int mlx4_en_set_ringparam(struct net_device *dev, tx_size == priv->tx_ring[0]->size) return 0; + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.tx_ring_size = tx_size; + new_prof.rx_ring_size = rx_size; + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) + goto out; + if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - mlx4_en_free_resources(priv); - - priv->prof->tx_ring_size = tx_size; - priv->prof->rx_ring_size = rx_size; + mlx4_en_safe_replace_resources(priv, tmp); - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); - goto out; - } if (port_up) { err = mlx4_en_start_port(dev); if (err) @@ -1084,8 +1089,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev, } err = mlx4_en_moderation_update(priv); - out: + kfree(tmp); mutex_unlock(&mdev->state_lock); return err; } @@ -1107,7 +1112,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - return priv->rx_ring_num; + return rounddown_pow_of_two(priv->rx_ring_num); } static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) @@ -1141,19 +1146,17 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rss_map *rss_map = &priv->rss_map; - int rss_rings; - size_t n = priv->rx_ring_num; + u32 n = mlx4_en_get_rxfh_indir_size(dev); + u32 i, rss_rings; int err = 0; - rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; - rss_rings = 1 << ilog2(rss_rings); + rss_rings = priv->prof->rss_rings ?: n; + rss_rings = rounddown_pow_of_two(rss_rings); - while (n--) { + for (i = 0; i < n; i++) { if (!ring_index) break; - ring_index[n] = rss_map->qps[n % rss_rings].qpn - - rss_map->base_qpn; + ring_index[i] = i % rss_rings; } if (key) memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); @@ -1166,6 +1169,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); + u32 n = mlx4_en_get_rxfh_indir_size(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; @@ -1175,18 +1179,18 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, /* Calculate RSS table size and make sure flows are spread evenly * between rings */ - for (i = 0; i < priv->rx_ring_num; i++) { + for (i = 0; i < n; i++) { if (!ring_index) - continue; + break; if (i > 0 && !ring_index[i] && !rss_rings) rss_rings = i; - if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) + if (ring_index[i] != (i % (rss_rings ?: n))) return -EINVAL; } if (!rss_rings) - rss_rings = priv->rx_ring_num; + rss_rings = n; /* RSS table size must be an order of 2 */ if (!is_power_of_2(rss_rings)) @@ -1714,6 +1718,8 @@ static int mlx4_en_set_channels(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; int port_up = 0; int err = 0; @@ -1723,25 +1729,35 @@ static int mlx4_en_set_channels(struct net_device *dev, !channel->tx_count || !channel->rx_count) return -EINVAL; - mutex_lock(&mdev->state_lock); - if (priv->port_up) { - port_up = 1; - mlx4_en_stop_port(dev, 1); + if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) { + en_err(priv, "Minimum %d tx channels required with XDP on\n", + priv->xdp_ring_num / MLX4_EN_NUM_UP + 1); + return -EINVAL; } - mlx4_en_free_resources(priv); + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; - priv->num_tx_rings_p_up = channel->tx_count; - priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; - priv->rx_ring_num = channel->rx_count; + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_tx_rings_p_up = channel->tx_count; + new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.rx_ring_num = channel->rx_count; - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); } - netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + mlx4_en_safe_replace_resources(priv, tmp); + + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); if (dev->num_tc) @@ -1757,8 +1773,8 @@ static int mlx4_en_set_channels(struct net_device *dev, } err = mlx4_en_moderation_update(priv); - out: + kfree(tmp); mutex_unlock(&mdev->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0c0dfd6cdca6..4198e9bf89d0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -31,6 +31,7 @@ * */ +#include <linux/bpf.h> #include <linux/etherdevice.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -67,6 +68,17 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) offset += priv->num_tx_rings_p_up; } +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + if (up) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + priv->cee_params.dcb_cfg.pfc_state = false; + } + } +#endif /* CONFIG_MLX4_EN_DCB */ + return 0; } @@ -1201,8 +1213,8 @@ static void mlx4_en_netpoll(struct net_device *dev) struct mlx4_en_cq *cq; int i; - for (i = 0; i < priv->rx_ring_num; i++) { - cq = priv->rx_cq[i]; + for (i = 0; i < priv->tx_ring_num; i++) { + cq = priv->tx_cq[i]; napi_schedule(&cq->napi); } } @@ -1510,6 +1522,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); } +static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, + int tx_ring_idx) +{ + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx]; + int rr_index; + + rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx; + if (rr_index >= 0) { + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, + "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n", + tx_ring_idx, rr_index); + } else { + tx_ring->recycle_ring = NULL; + } +} + int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1632,6 +1662,8 @@ int mlx4_en_start_port(struct net_device *dev) } tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + mlx4_en_init_recycle_ring(priv, i); + /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); @@ -1696,10 +1728,9 @@ int mlx4_en_start_port(struct net_device *dev) /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); -#ifdef CONFIG_MLX4_EN_VXLAN if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - vxlan_get_rx_port(dev); -#endif + udp_tunnel_get_rx_info(dev); + priv->port_up = true; netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -1954,7 +1985,7 @@ static int mlx4_en_close(struct net_device *dev) return 0; } -void mlx4_en_free_resources(struct mlx4_en_priv *priv) +static void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; @@ -1979,7 +2010,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) } -int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) +static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; @@ -2044,6 +2075,77 @@ static void mlx4_en_shutdown(struct net_device *dev) rtnl_unlock(); } +static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src, + struct mlx4_en_port_profile *prof) +{ + memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up; + dst->tx_ring_num = prof->tx_ring_num; + dst->rx_ring_num = prof->rx_ring_num; + dst->flags = prof->flags; + dst->mdev = src->mdev; + dst->port = src->port; + dst->dev = src->dev; + dst->prof = prof; + dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + + dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, + GFP_KERNEL); + if (!dst->tx_ring) + return -ENOMEM; + + dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, + GFP_KERNEL); + if (!dst->tx_cq) { + kfree(dst->tx_ring); + return -ENOMEM; + } + return 0; +} + +static void mlx4_en_update_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src) +{ + memcpy(dst->rx_ring, src->rx_ring, + sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num); + memcpy(dst->rx_cq, src->rx_cq, + sizeof(struct mlx4_en_cq *) * src->rx_ring_num); + memcpy(&dst->hwtstamp_config, &src->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + dst->tx_ring_num = src->tx_ring_num; + dst->rx_ring_num = src->rx_ring_num; + dst->tx_ring = src->tx_ring; + dst->tx_cq = src->tx_cq; + memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); +} + +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof) +{ + mlx4_en_copy_priv(tmp, priv, prof); + + if (mlx4_en_alloc_resources(tmp)) { + en_warn(priv, + "%s: Resource allocation failed, using previous configuration\n", + __func__); + kfree(tmp->tx_ring); + kfree(tmp->tx_cq); + return -ENOMEM; + } + return 0; +} + +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp) +{ + mlx4_en_free_resources(priv); + mlx4_en_update_priv(priv, tmp); +} + void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -2080,6 +2182,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mdev->upper[priv->port] = NULL; mutex_unlock(&mdev->state_lock); +#ifdef CONFIG_RFS_ACCEL + mlx4_en_cleanup_filters(priv); +#endif + mlx4_en_free_resources(priv); kfree(priv->tx_ring); @@ -2102,6 +2208,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } + if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { + en_err(priv, "MTU size:%d requires frags but XDP running\n", + new_mtu); + return -EOPNOTSUPP; + } dev->mtu = new_mtu; if (netif_running(dev)) { @@ -2359,7 +2470,6 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev, return 0; } -#ifdef CONFIG_MLX4_EN_VXLAN static void mlx4_en_add_vxlan_offloads(struct work_struct *work) { int ret; @@ -2409,15 +2519,19 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) } static void mlx4_en_add_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) return; - if (sa_family == AF_INET6) + if (ti->sa_family != AF_INET) + return; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2432,15 +2546,19 @@ static void mlx4_en_add_vxlan_port(struct net_device *dev, } static void mlx4_en_del_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (ti->sa_family != AF_INET) return; - if (sa_family == AF_INET6) + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2475,7 +2593,6 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, return features; } -#endif static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) { @@ -2504,6 +2621,103 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m return err; } +static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct bpf_prog *old_prog; + int xdp_ring_num; + int port_up = 0; + int err; + int i; + + xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0; + + /* No need to reconfigure buffers when simply swapping the + * program for a new one. + */ + if (priv->xdp_ring_num == xdp_ring_num) { + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + for (i = 0; i < priv->rx_ring_num; i++) { + /* This xchg is paired with READ_ONCE in the fastpath */ + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + return 0; + } + + if (priv->num_frags > 1) { + en_err(priv, "Cannot set XDP if MTU requires multiple frags\n"); + return -EOPNOTSUPP; + } + + if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) { + en_err(priv, + "Minimum %d tx channels required to run XDP\n", + (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP); + return -EINVAL; + } + + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + priv->xdp_ring_num = xdp_ring_num; + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); + + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port %d for XDP change\n", + priv->port); + queue_work(mdev->workqueue, &priv->watchdog_task); + } + } + + mutex_unlock(&mdev->state_lock); + return 0; +} + +static bool mlx4_xdp_attached(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return !!priv->xdp_ring_num; +} + +static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx4_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx4_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2528,12 +2742,11 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2566,12 +2779,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; struct mlx4_en_bond { @@ -2836,6 +3048,9 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_priv *priv; int i; int err; +#ifdef CONFIG_MLX4_EN_DCB + struct tc_configuration *tc; +#endif dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), MAX_TX_RINGS, MAX_RX_RINGS); @@ -2861,10 +3076,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); -#ifdef CONFIG_MLX4_EN_VXLAN INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); -#endif #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); @@ -2904,6 +3117,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { + priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + priv->flags |= MLX4_EN_DCB_ENABLED; + priv->cee_params.dcb_cfg.pfc_state = false; + + for (i = 0; i < MLX4_EN_NUM_UP; i++) { + tc = &priv->cee_params.dcb_cfg.tc_config[i]; + tc->dcb_pfc = pfc_disabled; + } + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { @@ -3124,6 +3348,8 @@ int mlx4_en_reset_config(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; int port_up = 0; int err = 0; @@ -3140,19 +3366,29 @@ int mlx4_en_reset_config(struct net_device *dev, return -EINVAL; } + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + mutex_lock(&mdev->state_lock); + + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) + goto out; + if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - mlx4_en_free_resources(priv); - en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n", - ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX)); + ts_config.rx_filter, + !!(features & NETIF_F_HW_VLAN_CTAG_RX)); - priv->hwtstamp_config.tx_type = ts_config.tx_type; - priv->hwtstamp_config.rx_filter = ts_config.rx_filter; + mlx4_en_safe_replace_resources(priv, tmp); if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_HW_VLAN_CTAG_RX) @@ -3186,11 +3422,6 @@ int mlx4_en_reset_config(struct net_device *dev, dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; } - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); - goto out; - } if (port_up) { err = mlx4_en_start_port(dev); if (err) @@ -3199,6 +3430,8 @@ int mlx4_en_reset_config(struct net_device *dev, out: mutex_unlock(&mdev->state_lock); - netdev_features_change(dev); + kfree(tmp); + if (!err) + netdev_features_change(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c1b3a9c8cf3b..2040dad8611d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -32,6 +32,7 @@ */ #include <net/busy_poll.h> +#include <linux/bpf.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct page *page; dma_addr_t dma; - for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) { + for (order = frag_info->order; ;) { gfp_t gfp = _gfp; if (order) @@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (dma_mapping_error(priv->ddev, dma)) { put_page(page); return -ENOMEM; @@ -124,7 +125,8 @@ out: while (i--) { if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, + priv->frag_info[i].dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc[i].page_size / @@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (frags[i].page) put_page(frags[i].page); @@ -176,7 +178,8 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, + priv->frag_info[i].dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc->page_size / @@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, frag_info->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < page_alloc->page_size) { put_page(page_alloc->page); @@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); + if (ring->page_cache.index > 0) { + frags[0] = ring->page_cache.buf[--ring->page_cache.index]; + rx_desc->data[0].addr = cpu_to_be64(frags[0].dma); + return 0; + } + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } @@ -502,26 +511,55 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } +/* When the rx ring is running in page-per-packet mode, a released frame can go + * directly into a small cache, to avoid unmapping or touching the page + * allocator. In bpf prog performance scenarios, buffers are either forwarded + * or dropped, never converted to skbs, so every page can come directly from + * this cache when it is sized to be a multiple of the napi budget. + */ +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame) +{ + struct mlx4_en_page_cache *cache = &ring->page_cache; + + if (cache->index >= MLX4_EN_CACHE_SIZE) + return false; + + cache->buf[cache->index++] = *frame; + return true; +} + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; + struct bpf_prog *old_prog; + old_prog = READ_ONCE(ring->xdp_prog); + if (old_prog) + bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; kfree(ring); *pring = NULL; -#ifdef CONFIG_RFS_ACCEL - mlx4_en_cleanup_filters(priv); -#endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { + int i; + + for (i = 0; i < ring->page_cache.index; i++) { + struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; + + dma_unmap_page(priv->ddev, frame->dma, frame->page_size, + priv->frag_info[0].dma_dir); + put_page(frame->page); + } + ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -743,7 +781,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; + struct bpf_prog *xdp_prog; + int doorbell_pending; struct sk_buff *skb; + int tx_index; int index; int nr; unsigned int length; @@ -759,6 +800,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; + xdp_prog = READ_ONCE(ring->xdp_prog); + doorbell_pending = 0; + tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ @@ -835,6 +880,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + /* A bpf program gets first chance to drop the packet. It may + * read bytes but not past the end of the frag. + */ + if (xdp_prog) { + struct xdp_buff xdp; + dma_addr_t dma; + u32 act; + + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_for_cpu(priv->ddev, dma, + priv->frag_info[0].frag_size, + DMA_FROM_DEVICE); + + xdp.data = page_address(frags[0].page) + + frags[0].page_offset; + xdp.data_end = xdp.data + length; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + if (!mlx4_en_xmit_frame(frags, dev, + length, tx_index, + &doorbell_pending)) + goto consumed; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + if (mlx4_en_rx_recycle(ring, frags)) + goto consumed; + goto next; + } + } + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { @@ -986,6 +1068,7 @@ next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); +consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; @@ -994,6 +1077,9 @@ next: } out: + if (doorbell_pending) + mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -1061,22 +1147,35 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { + enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE; struct mlx4_en_priv *priv = netdev_priv(dev); - /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple - * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). - */ - int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); + int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); + int order = MLX4_EN_ALLOC_PREFER_ORDER; + u32 align = SMP_CACHE_BYTES; int buf_size = 0; int i = 0; + /* bpf requires buffers to be set up as 1 packet per page. + * This only works when num_frags == 1. + */ + if (priv->xdp_ring_num) { + dma_dir = PCI_DMA_BIDIRECTIONAL; + /* This will gain efficient xdp frame recycling at the expense + * of more costly truesize accounting + */ + align = PAGE_SIZE; + order = 0; + } + while (buf_size < eff_mtu) { + priv->frag_info[i].order = order; priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); + ALIGN(priv->frag_info[i].frag_size, align); + priv->frag_info[i].dma_dir = dma_dir; buf_size += priv->frag_info[i].frag_size; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 76aa4d27183c..9df87ca0515a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->last_nr_txbb = 1; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8); @@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, - int napi_mode) +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + .page_offset = 0, + .page_size = PAGE_SIZE, + }; + + if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->frag_info[0].dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { @@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } while (ring->cons != ring->prod) { - ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0, 0 /* Non-NAPI caller */); @@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = mlx4_en_free_tx_desc( + last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp, napi_budget); @@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + return done < budget; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. @@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb, static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, const struct sk_buff *skb, const struct skb_shared_info *shinfo, - int real_size, u16 *vlan_tag, - int tx_ind, void *fragptr) + void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src, __iowrite64_copy(dst, src, bytecnt / 8); } +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + ring->doorbell_qpn, + ring->bf.uar->map + MLX4_SEND_DOORBELL); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; @@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_tag = 0; u16 vlan_proto = 0; int i_frag; int lso_header_size; @@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u32 ring_cons; + bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; @@ -749,9 +829,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } + bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { - vlan_tag = skb_vlan_tag_get(skb); + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; + bf_ok = false; } /* Save skb in tx_info ring */ @@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) - build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag, - tx_ind, fragptr); + build_inline_wqe(tx_desc, skb, shinfo, fragptr); if (skb->encapsulation) { union { @@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !skb_vlan_tag_present(skb) && send_doorbell) { - tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | - cpu_to_be32(real_size); - - op_own |= htonl((bf_index & 0xffff) << 8); - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - - wmb(); + bf_ok &= desc_size <= MAX_BF && send_doorbell; - mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, - desc_size); - - wmb(); - - ring->bf.offset ^= ring->bf.buf_size; - } else { - tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - if (vlan_proto == ETH_P_8021AD) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; - else if (vlan_proto == ETH_P_8021Q) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; - else - tx_desc->ctrl.ins_vlan = 0; - - tx_desc->ctrl.fence_size = real_size; + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - if (send_doorbell) { - wmb(); - /* Since there is no iowrite*_native() that writes the - * value as is, without byteswapping - using the one - * the doesn't do byteswapping in the relevant arch - * endianness. - */ -#if defined(__LITTLE_ENDIAN) - iowrite32( -#else - iowrite32be( -#endif - ring->doorbell_qpn, - ring->bf.uar->map + MLX4_SEND_DOORBELL); - } else { - ring->xmit_more++; - } - } + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); if (unlikely(stop_queue)) { /* If queue was emptied after the if (stop_queue) , and before @@ -1034,3 +1077,106 @@ tx_drop: return NETDEV_TX_OK; } +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + int index, bf_index; + bool send_doorbell; + int nr_txbb = 1; + bool stop_queue; + dma_addr_t dma; + int real_size; + __be32 op_own; + u32 ring_cons; + bool bf_ok; + + BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, + "mlx4_en_xmit_frame requires minimum size tx desc"); + + ring = priv->tx_ring[tx_ind]; + + if (!priv->port_up) + goto tx_drop; + + if (mlx4_en_is_tx_ring_full(ring)) + goto tx_drop; + + /* fetch ring->cons far ahead before needing it to avoid stall */ + ring_cons = READ_ONCE(ring->cons); + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + bf_ok = ring->bf_enabled; + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32)(ring->prod - ring_cons - 1)); + + bf_index = ring->prod; + tx_desc = ring->buf + index * TXBB_SIZE; + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->map0_byte_count = length; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + + data->addr = cpu_to_be64(dma); + data->lkey = ring->mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + ring->packets++; + ring->bytes += tx_info->nr_bytes; + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); + + ring->prod += nr_txbb; + + stop_queue = mlx4_en_is_tx_ring_full(ring); + send_doorbell = stop_queue || + *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; + bf_ok &= send_doorbell; + + real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, + op_own, bf_ok, send_doorbell); + *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + + return NETDEV_TX_OK; + +tx_drop: + ring->tx_dropped++; + return NETDEV_TX_BUSY; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e97094598b2d..f4497cf4d06d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1128,6 +1128,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c port_cap->max_pkeys = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); port_cap->max_vl = field & 0xf; + port_cap->max_tc_eth = field >> 4; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); port_cap->log_max_macs = field & 0xf; port_cap->log_max_vlans = field >> 4; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 7ea258af636a..cdbd76f10ced 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -53,6 +53,7 @@ struct mlx4_port_cap { int ib_mtu; int max_port_width; int max_vl; + int max_tc_eth; int max_gids; int max_pkeys; u64 def_mac; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index dec77d6f0ac9..0e8b7c44931f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) if (enable) { dev->flags |= MLX4_FLAG_BONDED; } else { - ret = mlx4_virt2phy_port_map(dev, 1, 2); + ret = mlx4_virt2phy_port_map(dev, 1, 2); if (ret) { mlx4_err(dev, "Fail to reset port map\n"); return ret; @@ -218,6 +218,9 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) + return; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 546fab0ecc3b..75dd2e3d3059 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -292,6 +292,7 @@ static int _mlx4_dev_port(struct mlx4_dev *dev, int port, dev->caps.pkey_table_len[port] = port_cap->max_pkeys; dev->caps.port_width_cap[port] = port_cap->max_port_width; dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; + dev->caps.max_tc_eth = port_cap->max_tc_eth; dev->caps.def_mac[port] = port_cap->def_mac; dev->caps.supported_type[port] = port_cap->supported_port_types; dev->caps.suggested_type[port] = port_cap->suggested_type; @@ -2599,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); - return err; + return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f2d0920018a5..94b891c118c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; @@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; } } } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 467d47ed2c39..2c2913dcae98 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,7 @@ enum { MLX4_EN_NUM_UP) #define MLX4_EN_DEFAULT_TX_WORK 256 +#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -164,6 +165,10 @@ enum { #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 +/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ +#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN)) #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 @@ -215,7 +220,10 @@ enum cq_type { struct mlx4_en_tx_info { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct page *page; + }; dma_addr_t map0_dma; u32 map0_byte_count; u32 nr_txbb; @@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc { u32 page_size; }; +#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) +struct mlx4_en_page_cache { + u32 index; + struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; +}; + +struct mlx4_en_priv; + struct mlx4_en_tx_ring { /* cache line used and dirtied in tx completion * (mlx4_en_free_tx_buf()) @@ -288,6 +304,11 @@ struct mlx4_en_tx_ring { __be32 mr_key; void *buf; struct mlx4_en_tx_info *tx_info; + struct mlx4_en_rx_ring *recycle_ring; + u32 (*free_tx_desc)(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, + u64 timestamp, int napi_mode); u8 *bounce_buf; struct mlx4_qp_context context; int qpn; @@ -319,6 +340,8 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; + struct bpf_prog *xdp_prog; + struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -353,12 +376,14 @@ struct mlx4_en_port_profile { u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; + u8 num_tx_rings_p_up; u8 rx_pause; u8 rx_ppp; u8 tx_pause; u8 tx_ppp; int rss_rings; int inline_thold; + struct hwtstamp_config hwtstamp_config; }; struct mlx4_en_profile { @@ -438,7 +463,9 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; - u16 frag_stride; + u32 frag_stride; + enum dma_data_direction dma_dir; + int order; }; #ifdef CONFIG_MLX4_EN_DCB @@ -448,6 +475,27 @@ struct mlx4_en_frag_info { #define MLX4_EN_TC_ETS 7 +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +struct tc_configuration { + enum dcb_pfc_type dcb_pfc; +}; + +struct mlx4_en_cee_config { + bool pfc_state; + struct tc_configuration tc_config[MLX4_EN_NUM_UP]; +}; + +struct mlx4_en_cee_params { + u8 dcbx_cap; + struct mlx4_en_cee_config dcb_cfg; +}; + #endif struct ethtool_flow_id { @@ -467,6 +515,9 @@ enum { MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), +#ifdef CONFIG_MLX4_EN_DCB + MLX4_EN_FLAG_DCB_ENABLED = (1 << 6), +#endif }; #define PORT_BEACON_MAX_LIMIT (65535) @@ -534,6 +585,7 @@ struct mlx4_en_priv { struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 num_frags; u16 log_rx_info; + int xdp_ring_num; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; @@ -545,10 +597,8 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; -#ifdef CONFIG_MLX4_EN_VXLAN struct work_struct vxlan_add_task; struct work_struct vxlan_del_task; -#endif struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_counter_stats pf_stats; @@ -570,9 +620,11 @@ struct mlx4_en_priv { u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB +#define MLX4_EN_DCB_ENABLED 0x3 struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS]; + struct mlx4_en_cee_params cee_params; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; @@ -623,8 +675,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u8 rx_ppp, u8 rx_pause, u8 tx_ppp, u8 tx_pause); -void mlx4_en_free_resources(struct mlx4_en_priv *priv); -int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof); +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, int entries, int ring, enum cq_type mode, int node); @@ -639,6 +694,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending); +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, @@ -667,6 +728,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 93195191f45b..395b5463cfd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) offset, order); return; } - __mlx4_free_mtt_range(dev, offset, order); + __mlx4_free_mtt_range(dev, offset, order); } void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 087b23b320cb..3d2095e5c61c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 #define MLX4_IGNORE_FCS_MASK 0x1 +#define MLNX4_TX_MAX_NUMBER 8 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { @@ -2015,3 +2016,14 @@ out: return ret; } EXPORT_SYMBOL(mlx4_get_module_info); + +int mlx4_max_tc(struct mlx4_dev *dev) +{ + u8 num_tc = dev->caps.max_tc_eth; + + if (!num_tc) + num_tc = MLNX4_TX_MAX_NUMBER; + + return num_tc; +} +EXPORT_SYMBOL(mlx4_max_tc); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cd9b2b28df88..8b81114bdc72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: - index = get_param_l(&in_param); - id = index & mpt_mask(dev); - err = mr_res_start_move_to(dev, slave, id, - RES_MPT_RESERVED, &mpt); - if (err) - return err; - - __mlx4_mpt_free_icm(dev, mpt->key); - res_end_move(dev, slave, RES_MPT, id); + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_RESERVED, &mpt); + if (err) return err; + + __mlx4_mpt_free_icm(dev, mpt->key); + res_end_move(dev, slave, RES_MPT, id); break; default: err = -EINVAL; @@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { - mlx4_warn(dev, - "Src check LB for slave %d isn't supported\n", - slave); + mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", + slave); return -ENOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722eba607..aae46884bf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9ea7b583096a..05cc1effc13c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -1,11 +1,13 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o vxlan.o en_tc.o en_arfs.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 943b1bd434bf..1b495efa7490 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/transobj.h> #include <linux/rhashtable.h> +#include <net/switchdev.h> #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -79,6 +80,7 @@ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 @@ -88,6 +90,7 @@ #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 @@ -126,6 +129,12 @@ static inline int mlx5_max_log_rq_size(int wq_type) } } +enum { + MLX5E_INLINE_MODE_L2, + MLX5E_INLINE_MODE_VPORT_CONTEXT, + MLX5_INLINE_MODE_NOT_REQUIRED, +}; + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -143,10 +152,31 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_data_seg data; }; +static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { + "rx_cqe_moder", +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), +}; + +#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \ + do { \ + if (enable) \ + priv->pflags |= pflag; \ + else \ + priv->pflags &= ~pflag; \ + } while (0) + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ #endif +struct mlx5e_cq_moder { + u16 usec; + u16 pkts; +}; + struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; @@ -155,16 +185,16 @@ struct mlx5e_params { u8 log_rq_size; u16 num_channels; u8 num_tc; + u8 rx_cq_period_mode; bool rx_cqe_compress_admin; bool rx_cqe_compress; - u16 rx_cq_moderation_usec; - u16 rx_cq_moderation_pkts; - u16 tx_cq_moderation_usec; - u16 tx_cq_moderation_pkts; + struct mlx5e_cq_moder rx_cq_moderation; + struct mlx5e_cq_moder tx_cq_moderation; u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; + u8 tx_min_inline_mode; u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; @@ -172,6 +202,7 @@ struct mlx5e_params { #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif + bool rx_am_enabled; }; struct mlx5e_tstamp { @@ -191,6 +222,7 @@ enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_FLUSH_TIMEOUT, + MLX5E_RQ_STATE_AM, }; struct mlx5e_cq { @@ -198,6 +230,7 @@ struct mlx5e_cq { struct mlx5_cqwq wq; /* data path - accessed per napi poll */ + u16 event_ctr; struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; @@ -227,6 +260,30 @@ struct mlx5e_dma_info { dma_addr_t addr; }; +struct mlx5e_rx_am_stats { + int ppms; /* packets per msec */ + int epms; /* events per msec */ +}; + +struct mlx5e_rx_am_sample { + ktime_t time; + unsigned int pkt_ctr; + u16 event_ctr; +}; + +struct mlx5e_rx_am { /* Adaptive Moderation */ + u8 state; + struct mlx5e_rx_am_stats prev_stats; + struct mlx5e_rx_am_sample start_sample; + struct work_struct work; + u8 profile_ix; + u8 mode; + u8 tune_state; + u8 steps_right; + u8 steps_left; + u8 tired; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -248,6 +305,8 @@ struct mlx5e_rq { unsigned long state; int ix; + struct mlx5e_rx_am am; /* Adaptive Moderation */ + /* control */ struct mlx5_wq_ctrl wq_ctrl; u8 wq_type; @@ -346,6 +405,7 @@ struct mlx5e_sq { u32 sqn; u16 bf_buf_size; u16 max_inline; + u8 min_inline_mode; u16 edge; struct device *pdev; struct mlx5e_tstamp *tstamp; @@ -358,6 +418,7 @@ struct mlx5e_sq { struct mlx5e_channel *channel; int tc; struct mlx5e_ico_wqe_info *ico_wqe_info; + u32 rate_limit; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -495,8 +556,24 @@ enum { MLX5E_ARFS_FT_LEVEL }; +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; + struct mlx5e_ethtool_steering ethtool; struct mlx5e_tc_table tc; struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; @@ -504,9 +581,15 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tables arfs; }; -struct mlx5e_direct_tir { - u32 tirn; +struct mlx5e_rqt { u32 rqtn; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; }; enum { @@ -514,6 +597,22 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; @@ -522,18 +621,15 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; - u32 pdn; - u32 tdn; - struct mlx5_core_mkey mkey; struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 indir_rqtn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; @@ -545,11 +641,14 @@ struct mlx5e_priv { struct work_struct tx_timeout_work; struct delayed_work update_stats_work; + u32 pflags; struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; u16 q_counter; + const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { @@ -567,6 +666,7 @@ enum mlx5e_link_mode { MLX5E_10GBASE_ER = 14, MLX5E_40GBASE_SR4 = 15, MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, MLX5E_100GBASE_CR4 = 20, MLX5E_100GBASE_SR4 = 21, MLX5E_100GBASE_KR4 = 22, @@ -584,6 +684,9 @@ enum mlx5e_link_mode { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +void mlx5e_build_ptys2ethtool_map(void); + void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); @@ -621,12 +724,26 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); +void mlx5e_rx_am(struct mlx5e_rq *rq); +void mlx5e_rx_am_work(struct work_struct *work); +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode); + void mlx5e_update_stats(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location); +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs); +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location); +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, @@ -656,6 +773,9 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); + static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { @@ -732,5 +852,39 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); + +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 3515e78ba68f..a8cb38789774 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; int i; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.tir_num = tirn[i]; + dest.tir_num = tir[i].tirn; tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], @@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - u32 *tirn = priv->indir_tirn; - u32 *match_criteria; - u32 *match_value; + struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; @@ -192,24 +189,23 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case ARFS_IPV4_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; break; case ARFS_IPV4_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; break; case ARFS_IPV6_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; break; case ARFS_IPV6_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; break; default: err = -EINVAL; goto out; } - arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable, - match_criteria, match_value, + arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); @@ -220,8 +216,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, __func__, type); } out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -475,23 +470,20 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule = NULL; struct mlx5_flow_destination dest; struct arfs_table *arfs_table; - u8 match_criteria_enable = 0; + struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; - u32 *match_criteria; - u32 *match_value; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; } - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ntohs(tuple->etype)); arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); if (!arfs_table) { @@ -501,59 +493,58 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, ft = arfs_table->ft.t; if (tuple->ip_proto == IPPROTO_TCP) { - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.tcp_dport); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.tcp_sport); - MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, ntohs(tuple->dst_port)); - MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, ntohs(tuple->src_port)); } else { - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_sport); - MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, ntohs(tuple->dst_port)); - MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, ntohs(tuple->src_port)); } if (tuple->etype == htons(ETH_P_IP)) { - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), &tuple->src_ipv4, 4); - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &tuple->dst_ipv4, 4); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); } else { - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), &tuple->src_ipv6, 16); - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &tuple->dst_ipv6, 16); - memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); - memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); if (IS_ERR(rule)) { @@ -563,8 +554,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, } out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err ? ERR_PTR(err) : rule; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000000..673043ccd76c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen) +{ + int err; + + err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn); + if (err) + return err; + + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + + return 0; +} + +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) +{ + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); +} + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + int err; + + err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); + if (err) { + mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + goto err_unmap_free_uar; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + + return 0; + +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &res->cq_uar); + + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + + mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + mlx5_unmap_free_uar(mdev, &res->cq_uar); +} + +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) +{ + struct mlx5e_tir *tir; + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + if (err) + return err; + } + + kvfree(in); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index c585349e05c3..caa9a3ccc3f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -195,7 +195,6 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - enum mlx5_port_status ps; u8 curr_pfc_en; int ret; @@ -204,14 +203,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (pfc->pfc_en == curr_pfc_en) return 0; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); - - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_toggle_port_link(mdev); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index e667a870e0c2..4a3757e60441 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -48,123 +48,85 @@ static void mlx5e_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } -static const struct { - u32 supported; - u32 advertised; +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); u32 speed; -} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_1000BASE_KX] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_CX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_20GBASE_KR2] = { - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, - }, - [MLX5E_40GBASE_CR4] = { - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_KR4] = { - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, - }, - [MLX5E_56GBASE_R4] = { - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, - }, - [MLX5E_10GBASE_CR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_SR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_ER] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_40GBASE_SR4] = { - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_LR4] = { - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, - }, - [MLX5E_100GBASE_CR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_SR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_KR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_LR4] = { - .speed = 100000, - }, - [MLX5E_100BASE_TX] = { - .speed = 100, - }, - [MLX5E_1000BASE_T] = { - .supported = SUPPORTED_1000baseT_Full, - .advertised = ADVERTISED_1000baseT_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_T] = { - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 1000, - }, - [MLX5E_25GBASE_CR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_KR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_SR] = { - .speed = 25000, - }, - [MLX5E_50GBASE_CR2] = { - .speed = 50000, - }, - [MLX5E_50GBASE_KR2] = { - .speed = 50000, - }, }; +static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_table[reg_]; \ + cfg->speed = speed_; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, SPEED_20000, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); +} + static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -177,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) return err ? 0 : pfc_en_tx | pfc_en_rx; } +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) #define MLX5E_NUM_RQ_STATS(priv) \ (NUM_RQ_STATS * priv->params.num_channels * \ @@ -185,8 +159,8 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, &priv->state)) #define MLX5E_NUM_PFC_COUNTERS(priv) \ - (hweight8(mlx5e_query_pfc_combined(priv)) * \ - NUM_PPORT_PER_PRIO_PFC_COUNTERS) + ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { @@ -200,6 +174,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv); + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(mlx5e_priv_flags); /* fallthrough */ default: return -EOPNOTSUPP; @@ -246,8 +222,18 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, prio); + pport_per_prio_pfc_stats_desc[i].format, "global"); } } @@ -272,9 +258,12 @@ static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct mlx5e_priv *priv = netdev_priv(dev); + int i; switch (stringset) { case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); break; case ETH_SS_TEST: @@ -339,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, } } + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, 0); + } + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -519,10 +515,11 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -ENOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; return 0; } @@ -533,6 +530,10 @@ static int mlx5e_set_coalesce(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channel *c; + bool restart = + !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; + bool was_opened; + int err = 0; int tc; int i; @@ -540,12 +541,19 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return -ENOTSUPP; mutex_lock(&priv->state_lock); - priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened && restart) { + mlx5e_close_locked(netdev); + priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + } + + priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; + + if (!was_opened || restart) goto out; for (i = 0; i < priv->params.num_channels; ++i) { @@ -564,35 +572,37 @@ static int mlx5e_set_coalesce(struct net_device *netdev, } out: + if (was_opened && restart) + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); - return 0; + return err; } -static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +static void ptys2ethtool_supported_link(unsigned long *supported_modes, + u32 eth_proto_cap) { - int i; - u32 supported_modes = 0; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - supported_modes |= ptys2ethtool_table[i].supported; - } - return supported_modes; + for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(supported_modes, supported_modes, + ptys2ethtool_table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap) { - int i; - u32 advertising_modes = 0; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - advertising_modes |= ptys2ethtool_table[i].advertised; - } - return advertising_modes; + for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(advertising_modes, advertising_modes, + ptys2ethtool_table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +static void ptys2ethtool_supported_port(struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) @@ -600,7 +610,7 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, FIBRE); } if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) @@ -608,9 +618,8 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { - return SUPPORTED_Backplane; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Backplane); } - return 0; } int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) @@ -634,7 +643,7 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *link_ksettings) { int i; u32 speed = SPEED_UNKNOWN; @@ -651,23 +660,32 @@ static void get_speed_duplex(struct net_device *netdev, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, u32 *supported) +static void get_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) { - *supported |= ptys2ethtool_supported_port(eth_proto_cap); - *supported |= ptys2ethtool_supported_link(eth_proto_cap); - *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + unsigned long *supported = link_ksettings->link_modes.supported; + + ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); + ptys2ethtool_supported_link(supported, eth_proto_cap); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); } static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, u32 *advertising) + u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings) { - *advertising |= ptys2ethtool_adver_link(eth_proto_cap); - *advertising |= tx_pause ? ADVERTISED_Pause : 0; - *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; + unsigned long *advertising = link_ksettings->link_modes.advertising; + + ptys2ethtool_adver_link(advertising, eth_proto_cap); + if (tx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); } static u8 get_connector_port(u32 eth_proto) @@ -695,13 +713,16 @@ static u8 get_connector_port(u32 eth_proto) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +static void get_lp_advertising(u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) { - *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); } -static int mlx5e_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -710,6 +731,8 @@ static int mlx5e_get_settings(struct net_device *netdev, u32 eth_proto_admin; u32 eth_proto_lp; u32 eth_proto_oper; + u8 an_disable_admin; + u8 an_status; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); @@ -720,35 +743,49 @@ static int mlx5e_get_settings(struct net_device *netdev, goto err_query_ptys; } - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); - cmd->supported = 0; - cmd->advertising = 0; + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, &cmd->supported); - get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); - get_speed_duplex(netdev, eth_proto_oper, cmd); + get_supported(eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, 0, 0, link_ksettings); + get_speed_duplex(netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - cmd->port = get_connector_port(eth_proto_oper); - get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); + link_ksettings->base.port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, link_ksettings); - cmd->transceiver = XCVR_INTERNAL; + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); err_query_ptys: return err; } -static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (ptys2ethtool_table[i].advertised & link_modes) + if (bitmap_intersects(ptys2ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); } @@ -767,21 +804,25 @@ static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) return speed_links; } -static int mlx5e_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + u32 eth_proto_cap, eth_proto_admin; + bool an_changes = false; + u8 an_disable_admin; + u8 an_disable_cap; + bool an_disable; u32 link_modes; + u8 an_status; u32 speed; - u32 eth_proto_cap, eth_proto_admin; - enum mlx5_port_status ps; int err; - speed = ethtool_cmd_speed(cmd); + speed = link_ksettings->base.speed; - link_modes = cmd->autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_ethtool2ptys_speed_link(speed); err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); @@ -806,15 +847,18 @@ static int mlx5e_set_settings(struct net_device *netdev, goto out; } - if (link_modes == eth_proto_admin) + mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, + &an_disable_cap, &an_disable_admin); + + an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eth_proto_admin) goto out; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_toggle_port_link(mdev); out: return err; @@ -861,7 +905,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) mlx5e_build_tir_ctx_hash(tirc, priv); for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen); + mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -883,7 +927,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { - u32 rqtn = priv->indir_rqtn; + u32 rqtn = priv->indir_rqt.rqtn; memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); @@ -916,6 +960,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: info->data = priv->params.num_channels; break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = priv->fs.ethtool.tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; default: err = -EOPNOTSUPP; break; @@ -1272,6 +1325,107 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_mode_changed; + u8 rx_cq_period_mode; + int err = 0; + bool reset; + + rx_cq_period_mode = enable ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) + return -ENOTSUPP; + + if (!rx_mode_changed) + return 0; + + reset = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (reset) + mlx5e_close_locked(netdev); + + mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + + if (reset) + err = mlx5e_open_locked(netdev); + + return err; +} + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag, + mlx5e_pflag_handler pflag_handler) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & flag); + u32 changes = wanted_flags ^ priv->pflags; + int err; + + if (!(changes & flag)) + return 0; + + err = pflag_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag 0x%x failed err %d\n", + enable ? "Enable" : "Disable", flag, err); + return err; + } + + MLX5E_SET_PRIV_FLAG(priv, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_CQE_BASED_MODER, + set_pflag_rx_cqe_based_moder); + + mutex_unlock(&priv->state_lock); + return err ? -EINVAL : 0; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->pflags; +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1284,13 +1438,14 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_channels = mlx5e_set_channels, .get_coalesce = mlx5e_get_coalesce, .set_coalesce = mlx5e_set_coalesce, - .get_settings = mlx5e_get_settings, - .set_settings = mlx5e_set_settings, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, .get_rxfh_key_size = mlx5e_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, @@ -1301,4 +1456,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index b32740092854..1587a9fd5724 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -156,19 +156,18 @@ enum mlx5e_vlan_rule_type { static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, - u16 vid, u32 *mc, u32 *mv) + u16 vid, struct mlx5_flow_spec *spec) { struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.l2.ft.t; - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: @@ -176,17 +175,19 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: rule_p = &priv->fs.vlan.any_vlan_rule; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ rule_p = &priv->fs.vlan.active_vlans_rule[vid]; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); - MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); break; } - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + *rule_p = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); @@ -203,27 +204,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; + return -ENOMEM; } if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) mlx5e_vport_context_update_vlans(priv); - err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, - match_value); + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); -add_vlan_rule_out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -598,32 +593,27 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u8 proto) { struct mlx5_flow_rule *rule; - u8 match_criteria_enable = 0; - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto out; + return ERR_PTR(-ENOMEM); } if (proto) { - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } if (etype) { - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rule(ft, match_criteria_enable, - match_criteria, match_value, + rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, dest); @@ -631,9 +621,8 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); } -out: - kvfree(match_criteria); - kvfree(match_value); + + kvfree(spec); return err ? ERR_PTR(err) : rule; } @@ -655,7 +644,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) if (tt == MLX5E_TT_ANY) dest.tir_num = priv->direct_tir[0].tirn; else - dest.tir_num = priv->indir_tirn[tt]; + dest.tir_num = priv->indir_tir[tt].tirn; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, ttc_rules[tt].proto); @@ -792,24 +781,20 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, { struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; u8 *mc_dmac; u8 *mv_dmac; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_l2_rule_out; + return -ENOMEM; } - mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); - mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value, + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -817,13 +802,13 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, switch (type) { case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; eth_broadcast_addr(mc_dmac); ether_addr_copy(mv_dmac, ai->addr); break; case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; mc_dmac[0] = 0x01; mv_dmac[0] = 0x01; break; @@ -832,8 +817,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, + ai->rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); if (IS_ERR(ai->rule)) { @@ -843,9 +827,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, ai->rule = NULL; } -add_l2_rule_out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -1102,6 +1084,8 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_l2_table; } + mlx5e_ethtool_init_steering(priv); + return 0; err_destroy_l2_table: @@ -1121,4 +1105,5 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 000000000000..d17c24227900 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_rule *rule; + struct mlx5e_ethtool_table *eth_ft; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = &priv->fs.ethtool.l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + ft = mlx5_create_auto_grouped_flow_table(ns, prio, + table_size, + MLX5E_ETHTOOL_NUM_GROUPS, 0); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4dst_m)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_tcpip4_spec *l4_val; + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_usrip4_spec *l3_val; + struct ethhdr *eth_val; + struct ethhdr *eth_mask; + + switch (flow_type) { + case TCP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + break; + case UDP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + l3_val = &fs->h_u.usr_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l3_mask->ip4src, + l3_val->ip4src, l3_mask->ip4dst, l3_val->ip4dst); + break; + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, smac_47_16), + eth_mask->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, smac_47_16), + eth_val->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + eth_mask->h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + eth_val->h_dest); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, + ntohs(eth_mask->h_proto)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, + ntohs(eth_val->h_proto)); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, 0xfff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(fs->h_ext.vlan_tci)); + } + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + fs->m_ext.h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_rule *iter; + struct list_head *head = &priv->fs.ethtool.rules; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + priv->fs.ethtool.tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_spec *spec; + struct mlx5_flow_rule *rule; + int err = 0; + u32 action; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + rule = mlx5_add_flow_rule(ft, spec, action, + MLX5_FS_DEFAULT_FLOW_TAG, dst); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule) +{ + if (eth_rule->rule) + mlx5_del_flow_rule(eth_rule->rule); + list_del(ð_rule->list); + priv->fs.ethtool.tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_usrip4_spec *l3_mask; + struct ethhdr *eth_mask; + int num_tuples = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + if (fs->ring_cookie >= priv->params.num_channels && + fs->ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + if (!is_zero_ether_addr(eth_mask->h_dest)) + num_tuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + num_tuples++; + if (eth_mask->h_proto) + num_tuples++; + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (fs->m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &fs->m_u.tcp_ip4_spec; + if (l4_mask->ip4src) { + if (!all_ones(l4_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->ip4dst) { + if (!all_ones(l4_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->psrc) { + if (!all_ones(l4_mask->psrc)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->pdst) { + if (!all_ones(l4_mask->pdst)) + return -EINVAL; + num_tuples++; + } + /* Flow is TCP/UDP */ + num_tuples++; + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) { + if (!all_ones(l3_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l3_mask->ip4dst) { + if (!all_ones(l3_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + /* Flow is IPv4 */ + num_tuples++; + break; + default: + return -EINVAL; + } + if ((fs->flow_type & FLOW_EXT)) { + if (fs->m_ext.vlan_etype || + (fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))) + return -EINVAL; + + if (fs->m_ext.vlan_tci) { + if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) + return -EINVAL; + } + num_tuples++; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_rule *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid\n", __func__); + return -EINVAL; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + if (!eth_ft->ft) { + err = -EINVAL; + goto del_ethtool_rule; + } + rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv, eth_rule); + + return err; +} + +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv, eth_rule); +out: + return err; +} + +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { + if (eth_rule->flow_spec.location == location) { + info->fs = eth_rule->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) +{ + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list) + del_ethtool_rule(priv, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) +{ + INIT_LIST_HEAD(&priv->fs.ethtool.rules); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7a0dca29c642..870bea37c57c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -47,14 +47,16 @@ enum { }; struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + bool am_enabled; }; struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16 max_inline; + u8 min_inline_mode; bool icosq; }; @@ -62,6 +64,7 @@ struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; u16 eq_ix; + u8 cq_period_mode; }; struct mlx5e_channel_param { @@ -254,14 +257,14 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_update_sw_counters(priv); } -static void mlx5e_update_stats_work(struct work_struct *work) +void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, update_stats_work); mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -367,6 +370,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wqe->data.byte_count = cpu_to_be32(byte_count); } + INIT_WORK(&rq->am.work, mlx5e_rx_am_work); + rq->am.mode = priv->params.rx_cq_period_mode; + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; @@ -539,6 +545,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_disable_rq; + if (param->am_enabled) + set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; @@ -574,6 +583,8 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ napi_synchronize(&rq->channel->napi); + cancel_work_sync(&rq->am.work); + mlx5e_disable_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_destroy_rq(rq); @@ -639,6 +650,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, } sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; + sq->min_inline_mode = + MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ? + param->min_inline_mode : 0; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) @@ -721,6 +735,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); @@ -741,7 +756,8 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, + int next_state, bool update_rl, int rl_index) { struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; @@ -761,6 +777,10 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); + if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); @@ -776,6 +796,8 @@ static void mlx5e_disable_sq(struct mlx5e_sq *sq) struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); } static int mlx5e_open_sq(struct mlx5e_channel *c, @@ -793,7 +815,8 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_destroy_sq; - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + false, 0); if (err) goto err_disable_sq; @@ -836,7 +859,7 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) mlx5e_send_nop(sq, true); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_ERR); + MLX5_SQC_STATE_ERR, false, 0); if (err) set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); } @@ -891,7 +914,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -938,6 +961,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - @@ -967,8 +991,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq) static int mlx5e_open_cq(struct mlx5e_channel *c, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, - u16 moderation_usecs, - u16 moderation_frames) + struct mlx5e_cq_moder moderation) { int err; struct mlx5e_priv *priv = c->priv; @@ -984,8 +1007,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c, if (MLX5_CAP_GEN(mdev, cq_moderation)) mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation_usecs, - moderation_frames); + moderation.usec, + moderation.pkts); return 0; err_destroy_cq: @@ -1014,8 +1037,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, for (tc = 0; tc < c->num_tc; tc++) { err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation_usec, - priv->params.tx_cq_moderation_pkts); + priv->params.tx_cq_moderation); if (err) goto err_close_tx_cqs; } @@ -1070,19 +1092,96 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_sq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, sq->rate_limit); + + sq->rate_limit = 0; + + if (rate) { + err = mlx5_rl_add_rate(mdev, rate, &rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RDY, true, rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, rate); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; struct net_device *netdev = priv->netdev; + struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; + struct mlx5e_sq *sq; int err; + int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) @@ -1093,14 +1192,19 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; + if (priv->params.rx_am_enabled) + rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); + else + rx_cq_profile = priv->params.rx_cq_moderation; + mlx5e_build_channeltc_to_txq_map(priv, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); if (err) goto err_napi_del; @@ -1109,8 +1213,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, goto err_close_icosq_cq; err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - priv->params.rx_cq_moderation_usec, - priv->params.rx_cq_moderation_pkts); + rx_cq_profile); if (err) goto err_close_tx_cqs; @@ -1124,6 +1227,16 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; + for (i = 0; i < priv->params.num_tc; i++) { + u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; + + if (priv->tx_rates[txq_ix]) { + sq = priv->txq_to_sq_map[txq_ix]; + mlx5e_set_sq_maxrate(priv->netdev, sq, + priv->tx_rates[txq_ix]); + } + } + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -1195,11 +1308,13 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; + + param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -1218,7 +1333,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); } @@ -1233,6 +1348,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); param->max_inline = priv->params.tx_max_inline; + param->min_inline_mode = priv->params.tx_min_inline_mode; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1240,7 +1356,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -1265,6 +1381,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, } mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -1275,6 +1393,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, @@ -1286,6 +1406,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, @@ -1348,6 +1470,11 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) goto err_close_channels; } + /* FIXME: This is a W/A for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_start_all_queues(priv->netdev); + kfree(cparam); return 0; @@ -1367,6 +1494,12 @@ static void mlx5e_close_channels(struct mlx5e_priv *priv) { int i; + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(priv->channel[i]); @@ -1421,7 +1554,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -1444,34 +1578,36 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) else mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); + err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); + if (!err) + rqt->enabled = true; kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { - mlx5_core_destroy_rqt(priv->mdev, rqtn); + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_rqts(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); - u32 *rqtn; + struct mlx5e_rqt *rqt = &priv->indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt; int err; int ix; - /* Indirect RQT */ - rqtn = &priv->indir_rqtn; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); - if (err) - return err; - - /* Direct RQTs */ - for (ix = 0; ix < nch; ix++) { - rqtn = &priv->direct_tir[ix].rqtn; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + rqt = &priv->direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) goto err_destroy_rqts; } @@ -1480,24 +1616,11 @@ static int mlx5e_create_rqts(struct mlx5e_priv *priv) err_destroy_rqts: for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn); - - mlx5e_destroy_rqt(priv, priv->indir_rqtn); + mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) -{ - int nch = mlx5e_get_max_num_channels(priv->mdev); - int i; - - for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn); - - mlx5e_destroy_rqt(priv, priv->indir_rqtn); -} - int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1533,10 +1656,15 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) u32 rqtn; int ix; - rqtn = priv->indir_rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + if (priv->indir_rqt.enabled) { + rqtn = priv->indir_rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + } + for (ix = 0; ix < priv->params.num_channels; ix++) { - rqtn = priv->direct_tir[ix].rqtn; + if (!priv->direct_tir[ix].rqt.enabled) + continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, ix); } } @@ -1596,13 +1724,13 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in, + err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); if (err) goto free_in; } - for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in, inlen); if (err) @@ -1615,40 +1743,6 @@ free_in: return err; } -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -{ - void *in; - int inlen; - int err; - int i; - - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { - err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in, - inlen); - if (err) - return err; - } - - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5_core_modify_tir(priv->mdev, - priv->direct_tir[i].tirn, in, - inlen); - if (err) - return err; - } - - kvfree(in); - - return 0; -} - static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1720,6 +1814,7 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; int num_txqs; int err; @@ -1742,7 +1837,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; } - err = mlx5e_refresh_tirs_self_loopback_enable(priv); + err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev); if (err) { netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", __func__, err); @@ -1755,9 +1850,14 @@ int mlx5e_open_locked(struct net_device *netdev) #ifdef CONFIG_RFS_ACCEL priv->netdev->rx_cpu_rmap = priv->mdev->rmap; #endif + if (priv->profile->update_stats) + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + goto err_close_channels; + } return 0; err_close_channels: @@ -1767,7 +1867,7 @@ err_clear_state_opened_flag: return err; } -static int mlx5e_open(struct net_device *netdev) +int mlx5e_open(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1782,6 +1882,7 @@ static int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -1791,6 +1892,9 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_remove_sqs_fwd_rules(priv); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_redirect_rqts(priv); @@ -1799,7 +1903,7 @@ int mlx5e_close_locked(struct net_device *netdev) return 0; } -static int mlx5e_close(struct net_device *netdev) +int mlx5e_close(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1858,7 +1962,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -1924,7 +2028,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } @@ -1934,12 +2038,12 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } -static int mlx5e_create_tises(struct mlx5e_priv *priv) +int mlx5e_create_tises(struct mlx5e_priv *priv) { int err; int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { err = mlx5e_create_tis(priv, tc); if (err) goto err_close_tises; @@ -1954,11 +2058,11 @@ err_close_tises: return err; } -static void mlx5e_destroy_tises(struct mlx5e_priv *priv) +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) + for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv, tc); } @@ -1967,7 +2071,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -1984,7 +2088,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { @@ -2074,7 +2178,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, u32 rqtn) { - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); mlx5e_build_tir_ctx_lro(tirc, priv); @@ -2083,15 +2187,13 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); + struct mlx5e_tir *tir; void *tirc; int inlen; - u32 *tirn; int err; u32 *in; - int ix; int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); @@ -2099,25 +2201,51 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - /* indirect tirs */ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(in, 0, inlen); - tirn = &priv->indir_tirn[tt]; + tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tirc, tt); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; } - /* direct tirs */ + kvfree(in); + + return 0; + +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); + + kvfree(in); + + return err; +} + +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + struct mlx5e_tir *tir; + void *tirc; + int inlen; + int err; + u32 *in; + int ix; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + for (ix = 0; ix < nch; ix++) { memset(in, 0, inlen); - tirn = &priv->direct_tir[ix].tirn; + tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqtn); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + priv->direct_tir[ix].rqt.rqtn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; } @@ -2128,27 +2256,28 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) err_destroy_ch_tirs: for (ix--; ix >= 0; ix--) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn); - -err_destroy_tirs: - for (tt--; tt >= 0; tt--) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]); + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); kvfree(in); return err; } -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) +static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); int i; - for (i = 0; i < nch; i++) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn); - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]); + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); +} + +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); } int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) @@ -2222,7 +2351,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2574,25 +2703,31 @@ static int mlx5e_get_vf_stats(struct net_device *dev, } static void mlx5e_add_vxlan_port(struct net_device *netdev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + if (!mlx5e_vxlan_allowed(priv->mdev)) return; - mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 1); + mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1); } static void mlx5e_del_vxlan_port(struct net_device *netdev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + if (!mlx5e_vxlan_allowed(priv->mdev)) return; - mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 0); + mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, @@ -2656,7 +2791,7 @@ static void mlx5e_tx_timeout(struct net_device *dev) for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; - if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; sched_work = true; set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); @@ -2682,6 +2817,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -2702,8 +2838,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, - .ndo_add_vxlan_port = mlx5e_add_vxlan_port, - .ndo_del_vxlan_port = mlx5e_del_vxlan_port, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, @@ -2833,13 +2970,48 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } -static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - int num_channels) +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + params->rx_cq_period_mode = cq_period_mode; + + params->rx_cq_moderation.pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; +} + +static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5E_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5E_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, + min_inline_mode); + break; + case MLX5_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} + +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; u32 pci_bw = 0; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2885,15 +3057,16 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); - priv->params.rx_cq_moderation_usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - priv->params.rx_cq_moderation_pkts = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - priv->params.tx_cq_moderation_usec = + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation_pkts = + priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode); priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; @@ -2901,14 +3074,20 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, sizeof(priv->params.toeplitz_hash_key)); mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, num_channels); + MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + /* Initialize pflags */ + MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, + priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + priv->mdev = mdev; priv->netdev = netdev; - priv->params.num_channels = num_channels; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -2934,7 +3113,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -static void mlx5e_build_netdev(struct net_device *netdev) +static const struct switchdev_ops mlx5e_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3015,31 +3198,11 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); -} -static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mkey *mkey) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_create_mkey_mbox_in *in; - int err; - - in = mlx5_vzalloc(sizeof(*in)); - if (!in) - return -ENOMEM; - - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); - - kvfree(in); - - return err; +#ifdef CONFIG_NET_SWITCHDEV + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; +#endif } static void mlx5e_create_q_counter(struct mlx5e_priv *priv) @@ -3069,7 +3232,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) struct mlx5_mkey_seg *mkc; int inlen = sizeof(*in); u64 npages = - mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; int err; in = mlx5_vzalloc(inlen); @@ -3084,7 +3247,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_ACCESS_MODE_MTT; mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); mkc->len = cpu_to_be64(npages << PAGE_SHIFT); mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); mkc->log2_page_size = PAGE_SHIFT; @@ -3097,160 +3260,233 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) return err; } -static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +static void mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { - struct net_device *netdev; - struct mlx5e_priv *priv; - int nch = mlx5e_get_max_num_channels(mdev); - int err; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; + struct mlx5e_priv *priv = netdev_priv(netdev); - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * MLX5E_MAX_NUM_TC, - nch); - if (!netdev) { - mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); - return NULL; - } + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_nic_netdev(netdev); + mlx5e_vxlan_init(priv); +} - mlx5e_build_netdev_priv(mdev, netdev, nch); - mlx5e_build_netdev(netdev); +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - netif_carrier_off(netdev); + mlx5e_vxlan_cleanup(priv); - priv = netdev_priv(netdev); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); +} - priv->wq = create_singlethread_workqueue("mlx5e"); - if (!priv->wq) - goto err_free_netdev; +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + int i; - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false); + err = mlx5e_create_indirect_rqts(priv); if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - goto err_destroy_wq; + mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + return err; } - err = mlx5_core_alloc_pd(mdev, &priv->pdn); + err = mlx5e_create_direct_rqts(priv); if (err) { - mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + goto err_destroy_indirect_rqts; } - err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn); + err = mlx5e_create_indirect_tirs(priv); if (err) { - mlx5_core_err(mdev, "alloc td failed, %d\n", err); - goto err_dealloc_pd; + mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_direct_rqts; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); + err = mlx5e_create_direct_tirs(priv); if (err) { - mlx5_core_err(mdev, "create mkey failed, %d\n", err); - goto err_dealloc_transport_domain; + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_indirect_tirs; } - err = mlx5e_create_umr_mkey(priv); + err = mlx5e_create_flow_steering(priv); if (err) { - mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_mkey; + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_direct_tirs; } + err = mlx5e_tc_init(priv); + if (err) + goto err_destroy_flow_steering; + + return 0; + +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->profile->max_nch(mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_tc_cleanup(priv); + mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + err = mlx5e_create_tises(priv); if (err) { - mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_umr_mkey; + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; } - err = mlx5e_open_drop_rq(priv); - if (err) { - mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_destroy_tises; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); +#endif + return 0; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); } - err = mlx5e_create_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create rqts failed, %d\n", err); - goto err_close_drop_rq; + mlx5e_enable_async_events(priv); + queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, &rep); } +} - err = mlx5e_create_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqts; +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_disable_async_events(priv); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_stats = mlx5e_update_stats, + .max_nch = mlx5e_get_max_num_channels, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int nch = profile->max_nch(mdev); + int err; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + nch * profile->max_tc, + nch); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; } - err = mlx5e_create_flow_steering(priv); + profile->init(mdev, netdev, profile, ppriv); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_free_netdev; + + err = mlx5e_create_umr_mkey(priv); if (err) { - mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_tirs; + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_wq; } - mlx5e_create_q_counter(priv); - - mlx5e_init_l2_addr(priv); + err = profile->init_tx(priv); + if (err) + goto err_destroy_umr_mkey; - mlx5e_vxlan_init(priv); + err = mlx5e_open_drop_rq(priv); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_cleanup_tx; + } - err = mlx5e_tc_init(priv); + err = profile->init_rx(priv); if (err) - goto err_dealloc_q_counters; + goto err_close_drop_rq; -#ifdef CONFIG_MLX5_CORE_EN_DCB - mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); -#endif + mlx5e_create_q_counter(priv); + + mlx5e_init_l2_addr(priv); err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_tc_cleanup; - } - - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - vxlan_get_rx_port(netdev); - rtnl_unlock(); + goto err_dealloc_q_counters; } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); + if (profile->enable) + profile->enable(priv); return priv; -err_tc_cleanup: - mlx5e_tc_cleanup(priv); - err_dealloc_q_counters: mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - -err_destroy_tirs: - mlx5e_destroy_tirs(priv); - -err_destroy_rqts: - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); -err_destroy_tises: - mlx5e_destroy_tises(priv); +err_cleanup_tx: + profile->cleanup_tx(priv); err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mkey); - -err_dealloc_transport_domain: - mlx5_core_dealloc_transport_domain(mdev, priv->tdn); - -err_dealloc_pd: - mlx5_core_dealloc_pd(mdev, priv->pdn); - -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); - err_destroy_wq: destroy_workqueue(priv->wq); @@ -3260,15 +3496,59 @@ err_free_netdev: return NULL; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) { - struct mlx5e_priv *priv = vpriv; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + mlx5_eswitch_register_vport_rep(esw, &rep); + } +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + void *ppriv = NULL; + void *ret; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + if (mlx5e_create_mdev_resources(mdev)) + return NULL; + + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!ret) { + mlx5e_destroy_mdev_resources(mdev); + return NULL; + } + return ret; +} + +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); - mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); @@ -3277,26 +3557,35 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) unregister_netdev(netdev); } - mlx5e_tc_cleanup(priv); - mlx5e_vxlan_cleanup(priv); mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); - mlx5e_destroy_tises(priv); + profile->cleanup_tx(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); - mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); - mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); - mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); cancel_delayed_work_sync(&priv->update_stats_work); destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) free_netdev(netdev); } +static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + struct mlx5e_priv *priv = vpriv; + int vport; + + mlx5e_destroy_netdev(mdev, priv); + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_get_netdev(void *vpriv) { struct mlx5e_priv *priv = vpriv; @@ -3305,8 +3594,8 @@ static void *mlx5e_get_netdev(void *vpriv) } static struct mlx5_interface mlx5e_interface = { - .add = mlx5e_create_netdev, - .remove = mlx5e_destroy_netdev, + .add = mlx5e_add, + .remove = mlx5e_remove, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, @@ -3314,6 +3603,7 @@ static struct mlx5_interface mlx5e_interface = { void mlx5e_init(void) { + mlx5e_build_ptys2ethtool_map(); mlx5_register_interface(&mlx5e_interface); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000000..1c7d8b8314bf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <generated/utsrelease.h> +#include <linux/mlx5/fs.h> +#include <net/switchdev.h> +#include <net/pkt_cls.h> + +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + strcpy(data + (i * ETH_GSTRING_LEN), + sw_rep_stats_desc[i].format); + break; + } +} + +static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->params.num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + } + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_sw_rep_counters(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_REP_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, +}; + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u8 mac[ETH_ALEN]; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac); + attr->u.ppid.id_len = ETH_ALEN; + memcpy(&attr->u.ppid.id, &mac, ETH_ALEN); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) + +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_channel *c; + int n, tc, err, num_sqs = 0; + u16 *sqs; + + sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + if (!sqs) + return -ENOMEM; + + for (n = 0; n < priv->params.num_channels; n++) { + c = priv->channel[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + } + + err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); + + kfree(sqs); + return err; +} + +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + return mlx5e_add_sqs_fwd_rules(priv); + return 0; +} + +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); +} + +static int mlx5e_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + int ret; + + ret = snprintf(buf, len, "%d", rep->vport - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -EOPNOTSUPP; + + switch (tc->type) { + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); + } + default: + return -EOPNOTSUPP; + } +} + +static const struct switchdev_ops mlx5e_rep_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, + .ndo_get_stats64 = mlx5e_get_stats, +}; + +static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + priv->params.log_sq_size = + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + priv->params.num_tc = 1; + + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev) +{ + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + +#ifdef CONFIG_NET_SWITCHDEV + netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; +#endif + + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; + + eth_hw_addr_random(netdev); +} + +static void mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_rep_netdev(netdev); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_flow_rule *flow_rule; + int err; + int i; + + err = mlx5e_create_direct_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + return err; + } + + err = mlx5e_create_direct_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_direct_rqts; + } + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + priv->direct_tir[0].tirn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_destroy_direct_tirs; + } + rep->vport_rx_rule = flow_rule; + + err = mlx5e_tc_init(priv); + if (err) + goto err_del_flow_rule; + + return 0; + +err_del_flow_rule: + mlx5_del_flow_rule(rep->vport_rx_rule); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = priv->ppriv; + int i; + + mlx5e_tc_cleanup(priv); + mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5e_destroy_direct_tirs(priv); + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + return 0; +} + +static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) +{ +#define MLX5E_PORT_REPRESENTOR_NCH 1 + return MLX5E_PORT_REPRESENTOR_NCH; +} + +static struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .update_stats = mlx5e_update_sw_rep_counters, + .max_nch = mlx5e_get_rep_max_num_channels, + .max_tc = 1, +}; + +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!rep->priv_data) { + pr_warn("Failed to create representor for vport %d\n", + rep->vport); + return -EINVAL; + } + return 0; +} + +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + mlx5e_destroy_netdev(esw->dev, priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c new file mode 100644 index 000000000000..1fffe48a93cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* Adaptive moderation profiles */ +#define MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define MLX5E_RX_AM_DEF_PROFILE_CQE 1 +#define MLX5E_RX_AM_DEF_PROFILE_EQE 1 +#define MLX5E_PARAMS_AM_NUM_PROFILES 5 + +/* All profiles sizes must be MLX5E_PARAMS_AM_NUM_PROFILES */ +#define MLX5_AM_EQE_PROFILES { \ + {1, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +} + +#define MLX5_AM_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +static const struct mlx5e_cq_moder +profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = { + MLX5_AM_EQE_PROFILES, + MLX5_AM_CQE_PROFILES, +}; + +static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix) +{ + return profile[cq_period_mode][ix]; +} + +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) +{ + int default_profile_ix; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_CQE; + else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */ + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE; + + return profile[rx_cq_period_mode][default_profile_ix]; +} + +/* Adaptive moderation logic */ +enum { + MLX5E_AM_START_MEASURE, + MLX5E_AM_MEASURE_IN_PROGRESS, + MLX5E_AM_APPLY_NEW_PROFILE, +}; + +enum { + MLX5E_AM_PARKING_ON_TOP, + MLX5E_AM_PARKING_TIRED, + MLX5E_AM_GOING_RIGHT, + MLX5E_AM_GOING_LEFT, +}; + +enum { + MLX5E_AM_STATS_WORSE, + MLX5E_AM_STATS_SAME, + MLX5E_AM_STATS_BETTER, +}; + +enum { + MLX5E_AM_STEPPED, + MLX5E_AM_TOO_TIRED, + MLX5E_AM_ON_EDGE, +}; + +static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); + return true; + case MLX5E_AM_GOING_RIGHT: + return (am->steps_left > 1) && (am->steps_right == 1); + default: /* MLX5E_AM_GOING_LEFT */ + return (am->steps_right > 1) && (am->steps_left == 1); + } +} + +static void mlx5e_am_turn(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + am->tune_state = MLX5E_AM_GOING_LEFT; + am->steps_left = 0; + break; + case MLX5E_AM_GOING_LEFT: + am->tune_state = MLX5E_AM_GOING_RIGHT; + am->steps_right = 0; + break; + } +} + +static int mlx5e_am_step(struct mlx5e_rx_am *am) +{ + if (am->tired == (MLX5E_PARAMS_AM_NUM_PROFILES * 2)) + return MLX5E_AM_TOO_TIRED; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) + return MLX5E_AM_ON_EDGE; + am->profile_ix++; + am->steps_right++; + break; + case MLX5E_AM_GOING_LEFT: + if (am->profile_ix == 0) + return MLX5E_AM_ON_EDGE; + am->profile_ix--; + am->steps_left++; + break; + } + + am->tired++; + return MLX5E_AM_STEPPED; +} + +static void mlx5e_am_park_on_top(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tired = 0; + am->tune_state = MLX5E_AM_PARKING_ON_TOP; +} + +static void mlx5e_am_park_tired(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tune_state = MLX5E_AM_PARKING_TIRED; +} + +static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am) +{ + am->tune_state = am->profile_ix ? MLX5E_AM_GOING_LEFT : + MLX5E_AM_GOING_RIGHT; + mlx5e_am_step(am); +} + +static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr, + struct mlx5e_rx_am_stats *prev) +{ + int diff; + + if (!prev->ppms) + return curr->ppms ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_SAME; + + diff = curr->ppms - prev->ppms; + if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */ + return (diff > 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + if (!prev->epms) + return curr->epms ? MLX5E_AM_STATS_WORSE : + MLX5E_AM_STATS_SAME; + + diff = curr->epms - prev->epms; + if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */ + return (diff < 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + return MLX5E_AM_STATS_SAME; +} + +static bool mlx5e_am_decision(struct mlx5e_rx_am_stats *curr_stats, + struct mlx5e_rx_am *am) +{ + int prev_state = am->tune_state; + int prev_ix = am->profile_ix; + int stats_res; + int step_res; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_SAME) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_PARKING_TIRED: + am->tired--; + if (!am->tired) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_GOING_RIGHT: + case MLX5E_AM_GOING_LEFT: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_BETTER) + mlx5e_am_turn(am); + + if (mlx5e_am_on_top(am)) { + mlx5e_am_park_on_top(am); + break; + } + + step_res = mlx5e_am_step(am); + switch (step_res) { + case MLX5E_AM_ON_EDGE: + mlx5e_am_park_on_top(am); + break; + case MLX5E_AM_TOO_TIRED: + mlx5e_am_park_tired(am); + break; + } + + break; + } + + if ((prev_state != MLX5E_AM_PARKING_ON_TOP) || + (am->tune_state != MLX5E_AM_PARKING_ON_TOP)) + am->prev_stats = *curr_stats; + + return am->profile_ix != prev_ix; +} + +static void mlx5e_am_sample(struct mlx5e_rq *rq, + struct mlx5e_rx_am_sample *s) +{ + s->time = ktime_get(); + s->pkt_ctr = rq->stats.packets; + s->event_ctr = rq->cq.event_ctr; +} + +#define MLX5E_AM_NEVENTS 64 + +static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, + struct mlx5e_rx_am_sample *end, + struct mlx5e_rx_am_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + unsigned int npkts = end->pkt_ctr - start->pkt_ctr; + + if (!delta_us) { + WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + return; + } + + curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; + curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; +} + +void mlx5e_rx_am_work(struct work_struct *work) +{ + struct mlx5e_rx_am *am = container_of(work, struct mlx5e_rx_am, + work); + struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); + struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; + + mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + cur_profile.usec, cur_profile.pkts); + + am->state = MLX5E_AM_START_MEASURE; +} + +void mlx5e_rx_am(struct mlx5e_rq *rq) +{ + struct mlx5e_rx_am *am = &rq->am; + struct mlx5e_rx_am_sample end_sample; + struct mlx5e_rx_am_stats curr_stats; + u16 nevents; + + switch (am->state) { + case MLX5E_AM_MEASURE_IN_PROGRESS: + nevents = rq->cq.event_ctr - am->start_sample.event_ctr; + if (nevents < MLX5E_AM_NEVENTS) + break; + mlx5e_am_sample(rq, &end_sample); + mlx5e_am_calc_stats(&am->start_sample, &end_sample, + &curr_stats); + if (mlx5e_am_decision(&curr_stats, am)) { + am->state = MLX5E_AM_APPLY_NEW_PROFILE; + schedule_work(&am->work); + break; + } + /* fall through */ + case MLX5E_AM_START_MEASURE: + mlx5e_am_sample(rq, &am->start_sample); + am->state = MLX5E_AM_MEASURE_IN_PROGRESS; + break; + case MLX5E_AM_APPLY_NEW_PROFILE: + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index fcd490cc5610..7b9d8a989b52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -151,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = { VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, { "tx_vport_broadcast_bytes", VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, }; #define PPORT_802_3_OFF(c) \ @@ -238,11 +254,12 @@ static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { }; static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; struct mlx5e_rq_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 704c3d30493e..0f19b01e3fff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -37,8 +37,11 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> +#include <net/switchdev.h> +#include <net/tc_act/tc_mirred.h> #include "en.h" #include "en_tc.h" +#include "eswitch.h" struct mlx5e_tc_flow { struct rhash_head node; @@ -49,9 +52,9 @@ struct mlx5e_tc_flow { #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, - u32 action, u32 flow_tag) +static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; @@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else { + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -88,8 +91,8 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, table_created = true; } - rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS, - match_c, match_v, + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rule(priv->fs.tc.t, spec, action, flow_tag, &dest); @@ -109,6 +112,22 @@ err_create_ft: return rule; } +static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 dst_vport) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + u32 src_vport; + + if (rep->vport) /* set source vport for the flow */ + src_vport = rep->vport; + else + src_vport = FDB_UPLINK_VPORT; + + return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule) { @@ -120,18 +139,19 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv)) { + if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } } -static int parse_cls_flower(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, +static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); u16 addr_type = 0; u8 ip_proto = 0; @@ -294,8 +314,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return 0; } -static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) +static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *flow_tag) { const struct tc_action *a; @@ -338,17 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *dest_vport) +{ + const struct tc_action *a; + + if (tc_no_actions(exts)) + return -EINVAL; + + *action = 0; + + tc_for_each_action(a, exts) { + /* Only support a single action per rule */ + if (*action) + return -EINVAL; + + if (is_tcf_gact_shot(a)) { + *action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_mirred_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch_rep *out_rep; + + out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); + + if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EINVAL; + } + + out_priv = netdev_priv(out_dev); + out_rep = out_priv->ppriv; + if (out_rep->vport == 0) + *dest_vport = FDB_UPLINK_VPORT; + else + *dest_vport = out_rep->vport; + *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + return 0; +} + int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { struct mlx5e_tc_table *tc = &priv->fs.tc; - u32 *match_c; - u32 *match_v; int err = 0; - u32 flow_tag; - u32 action; + u32 flow_tag, action, dest_vport = 0; struct mlx5e_tc_flow *flow; + struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -357,49 +426,53 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, else flow = kzalloc(sizeof(*flow), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_c || !match_v || !flow) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec || !flow) { err = -ENOMEM; goto err_free; } flow->cookie = f->cookie; - err = parse_cls_flower(priv, match_c, match_v, f); + err = parse_cls_flower(priv, spec, f); if (err < 0) goto err_free; - err = parse_tc_actions(priv, f->exts, &action, &flow_tag); - if (err < 0) + if (esw && esw->mode == SRIOV_OFFLOADS) { + err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + } else { + err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + } + + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); goto err_free; + } err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) - goto err_free; - - flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action, - flow_tag); - if (IS_ERR(flow->rule)) { - err = PTR_ERR(flow->rule); - goto err_hash_del; - } + goto err_del_rule; if (old) mlx5e_tc_del_flow(priv, old); goto out; -err_hash_del: - rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); +err_del_rule: + mlx5_del_flow_rule(flow->rule); err_free: if (!old) kfree(flow); out: - kfree(match_c); - kfree(match_v); + kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5740b465ef84..e073bf59890d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -128,6 +128,50 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, return priv->channeltc_to_txq_map[channel_ix][up]; } +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) + return keys.control.thoff; + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) +{ + int hlen; + + switch (mode) { + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + return hlen; + case MLX5_INLINE_MODE_IP: + /* When transport header is set to zero, it means no transport + * header. When transport header is set to 0xff's, it means + * transport header wasn't set. + */ + if (skb_transport_offset(skb)) + return mlx5e_skb_l3_header_offset(skb); + /* fall through */ + case MLX5_INLINE_MODE_L2: + default: + return mlx5e_skb_l2_header_offset(skb); + } +} + static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct sk_buff *skb, bool bf) { @@ -135,8 +179,6 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) - if (bf) { u16 ihs = skb_headlen(skb); @@ -146,8 +188,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, if (ihs <= sq->max_inline) return skb_headlen(skb); } - - return max(skb_network_offset(skb), MLX5E_MIN_INLINE); + return mlx5e_calc_min_inline(sq->min_inline_mode, skb); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index c38781fa567d..64ae2e800daa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -136,6 +136,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); + + if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + mlx5e_rx_am(&c->rq); + mlx5e_cq_arm(&c->rq.cq); mlx5e_cq_arm(&c->icosq.cq); @@ -146,6 +150,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + cq->event_ctr++; set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aebbd6ccb9fe..f6d667797ee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0xFFFF -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...) \ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -92,6 +81,9 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -337,25 +329,23 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_rule *flow_rule = NULL; struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; void *mv_misc = NULL; void *mc_misc = NULL; u8 *dmac_v = NULL; u8 *dmac_c = NULL; - u32 *match_v; - u32 *match_c; if (rx_rule) match_header |= MLX5_MATCH_MISC_PARAMETERS; - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { pr_warn("FDB: Failed to alloc match parameters\n"); - goto out; + return NULL; } - - dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); - dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); if (match_header & MLX5_MATCH_OUTER_HEADERS) { @@ -364,8 +354,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } if (match_header & MLX5_MATCH_MISC_PARAMETERS) { - mv_misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); - mc_misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -376,11 +368,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, esw_debug(esw->dev, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; flow_rule = - mlx5_add_flow_rule(esw->fdb_table.fdb, - match_header, - match_c, - match_v, + mlx5_add_flow_rule(esw->fdb_table.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) { @@ -389,9 +379,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); flow_rule = NULL; } -out: - kfree(match_v); - kfree(match_c); + + kvfree(spec); return flow_rule; } @@ -428,7 +417,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -479,7 +468,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; + esw->fdb_table.legacy.addr_grp = g; /* Allmulti group : One rule that forwards any mcast traffic */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -494,7 +483,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; } - esw->fdb_table.allmulti_grp = g; + esw->fdb_table.legacy.allmulti_grp = g; /* Promiscuous group : * One rule that forward all unmatched traffic from previous groups @@ -511,17 +500,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; } - esw->fdb_table.promisc_grp = g; + esw->fdb_table.legacy.promisc_grp = g; out: if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - esw->fdb_table.allmulti_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; } - if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); - esw->fdb_table.addr_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; } if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { mlx5_destroy_flow_table(esw->fdb_table.fdb); @@ -533,20 +522,20 @@ out: return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; - esw->fdb_table.allmulti_grp = NULL; - esw->fdb_table.promisc_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -578,7 +567,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -1300,9 +1290,8 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_spec *spec; u8 smac[ETH_ALEN]; - u32 *match_v; - u32 *match_c; int err = 0; u8 *smac_v; @@ -1336,9 +1325,8 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", vport->vport, vport->vlan, vport->qos); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { err = -ENOMEM; esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", vport->vport, err); @@ -1346,22 +1334,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } if (vport->vlan || vport->qos) - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); if (vport->spoofchk) { - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, - match_v, + spec->match_value, outer_headers.smac_47_16); ether_addr_copy(smac_v, smac); } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->ingress.allow_rule = - mlx5_add_flow_rule(vport->ingress.acl, - MLX5_MATCH_OUTER_HEADERS, - match_c, - match_v, + mlx5_add_flow_rule(vport->ingress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); if (IS_ERR(vport->ingress.allow_rule)) { @@ -1372,13 +1358,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); - memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(spec, 0, sizeof(*spec)); vport->ingress.drop_rule = - mlx5_add_flow_rule(vport->ingress.acl, - 0, - match_c, - match_v, + mlx5_add_flow_rule(vport->ingress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); if (IS_ERR(vport->ingress.drop_rule)) { @@ -1392,17 +1374,14 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, out: if (err) esw_vport_cleanup_ingress_rules(esw, vport); - - kfree(match_v); - kfree(match_c); + kvfree(spec); return err; } static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - u32 *match_v; - u32 *match_c; + struct mlx5_flow_spec *spec; int err = 0; esw_vport_cleanup_egress_rules(esw, vport); @@ -1418,9 +1397,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->vlan, vport->qos); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { err = -ENOMEM; esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", vport->vport, err); @@ -1428,16 +1406,14 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid); - MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = - mlx5_add_flow_rule(vport->egress.acl, - MLX5_MATCH_OUTER_HEADERS, - match_c, - match_v, + mlx5_add_flow_rule(vport->egress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); if (IS_ERR(vport->egress.allowed_vlan)) { @@ -1449,13 +1425,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Drop others rule (star rule) */ - memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); - memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(spec, 0, sizeof(*spec)); vport->egress.drop_rule = - mlx5_add_flow_rule(vport->egress.acl, - 0, - match_c, - match_v, + mlx5_add_flow_rule(vport->egress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); if (IS_ERR(vport->egress.drop_rule)) { @@ -1465,8 +1437,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, vport->egress.drop_rule = NULL; } out: - kfree(match_v); - kfree(match_c); + kvfree(spec); return err; } @@ -1540,10 +1511,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1561,16 +1532,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); - + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + esw_enable_vport(esw, i, enabled_events); esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -1584,16 +1559,18 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", - esw->enabled_vports); + esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", + esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1601,8 +1578,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rule(mc_promisc->uplink_rule); - esw_destroy_fdb_table(esw); + if (esw->mode == SRIOV_LEGACY) + esw_destroy_legacy_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); + esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } @@ -1660,6 +1641,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->offloads.vport_reps = + kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep), + GFP_KERNEL); + if (!esw->offloads.vport_reps) { + err = -ENOMEM; + goto abort; + } + mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { @@ -1673,6 +1662,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->total_vports = total_vports; esw->enabled_vports = 0; + esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; esw_enable_vport(esw, 0, UC_ADDR_CHANGE); @@ -1683,6 +1673,7 @@ abort: destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->vports); + kfree(esw->offloads.vport_reps); kfree(esw); return err; } @@ -1700,6 +1691,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->mc_promisc); + kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd6800256d4a..c0b05603fc31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> +#include <net/devlink.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -46,6 +47,8 @@ #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) #define MLX5_L2_ADDR_HASH(addr) (addr[5]) +#define FDB_UPLINK_VPORT 0xffff + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -134,9 +137,49 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; - struct mlx5_flow_group *addr_grp; - struct mlx5_flow_group *allmulti_grp; - struct mlx5_flow_group *promisc_grp; + union { + struct legacy_fdb { + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; + } offloads; + }; +}; + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +struct mlx5_esw_sq { + struct mlx5_flow_rule *send_to_vport_rule; + struct list_head list; +}; + +struct mlx5_eswitch_rep { + int (*load)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + u16 vport; + struct mlx5_flow_rule *vport_rx_rule; + void *priv_data; + struct list_head vport_sqs_list; + bool valid; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_eswitch_rep *vport_reps; }; struct mlx5_eswitch { @@ -153,13 +196,15 @@ struct mlx5_eswitch { */ struct mutex state_lock; struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; + int mode; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]); @@ -177,4 +222,36 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_spec; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num); +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport); + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000000..a357e8eeeed8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +enum { + FDB_FAST_PATH = 0, + FDB_SLOW_PATH +}; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport) +{ + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_rule *rule; + void *misc; + + if (esw->mode != SRIOV_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = dst_vport; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, action, 0, &dest); + + if (IS_ERR(rule)) + mlx5_fc_destroy(esw->dev, counter); + + return rule; +} + +static struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} + +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_sq *esw_sq, *tmp; + + if (esw->mode != SRIOV_OFFLOADS) + return; + + list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { + mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + list_del(&esw_sq->list); + kfree(esw_sq); + } +} + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num) +{ + struct mlx5_flow_rule *flow_rule; + struct mlx5_esw_sq *esw_sq; + int vport; + int err; + int i; + + if (esw->mode != SRIOV_OFFLOADS) + return 0; + + vport = rep->vport == 0 ? + FDB_UPLINK_VPORT : rep->vport; + + for (i = 0; i < sqns_num; i++) { + esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); + if (!esw_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, + vport, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(esw_sq); + goto out_err; + } + esw_sq->send_to_vport_rule = flow_rule; + list_add(&esw_sq->list, &rep->vport_sqs_list); + } + return 0; + +out_err: + mlx5_eswitch_sqs2vport_stop(esw, rep); + return err; +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kvfree(spec); + return err; +} + +#define MAX_PF_SQ 256 +#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ +#define ESW_OFFLOADS_NUM_GROUPS 4 + +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + struct mlx5_flow_group *g; + u32 *flow_group_in; + void *match_criteria; + int table_size, ix, err = 0; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + goto ns_err; + } + + esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + ESW_OFFLOADS_NUM_ENTRIES, + ESW_OFFLOADS_NUM_GROUPS, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); + goto fast_fdb_err; + } + esw->fdb_table.fdb = fdb; + + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.fdb = fdb; + + /* create send-to-vport group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + + ix = nvports + MAX_PF_SQ; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto send_vport_err; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); +slow_fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.fdb); +fast_fdb_err: +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = &esw->dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) + esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + + err = rep->load(esw, rep); + if (err) + goto err_reps; + } + return 0; + +err_reps: + for (vport--; vport >= 0; vport--) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + esw_destroy_vport_rx_group(esw); + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_table(esw); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err) + esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_table(esw); +} + +static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = SRIOV_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = SRIOV_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev; + u16 cur_mlx5_mode, mlx5_mode = 0; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + cur_mlx5_mode = dev->priv.eswitch->mode; + + if (cur_mlx5_mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (cur_mlx5_mode == mlx5_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return esw_offloads_start(dev->priv.eswitch); + else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return esw_offloads_stop(dev->priv.eswitch); + else + return -EINVAL; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_core_dev *dev; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *mode = dev->priv.eswitch->mode; + + return 0; +} + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + memcpy(&offloads->vport_reps[rep->vport], rep, + sizeof(struct mlx5_eswitch_rep)); + + INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); + offloads->vport_reps[rep->vport].valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + rep->unload(esw, rep); + + offloads->vport_reps[vport].valid = false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a5bb6b695242..9134010e2921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, return 0; } + +struct mlx5_cmd_fc_bulk { + u16 id; + int num; + int outlen; + u32 out[0]; +}; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +{ + struct mlx5_cmd_fc_bulk *b; + int outlen = sizeof(*b) + + MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * num; + + b = kzalloc(outlen, GFP_KERNEL); + if (!b) + return NULL; + + b->id = id; + b->num = num; + b->outlen = outlen; + + return b; +} + +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +{ + kfree(b); +} + +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +{ + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + b->out, b->outlen); +} + +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes) +{ + int index = id - b->id; + void *stats; + + if (index < 0 || index >= b->num) { + mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", + id, b->id, b->id + b->num - 1); + return; + } + + stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, + flow_statistics[index]); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index fc4f7b83fe0a..158844cef82b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, u64 *packets, u64 *bytes); + +struct mlx5_cmd_fc_bulk; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e912a3d2505e..75bb8c864557 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -67,13 +67,21 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + #define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 #define BY_PASS_PRIO_NUM_LEVELS 1 -#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ LEFTOVERS_NUM_PRIOS) +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 10 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Vlan, mac, ttc, aRFS */ #define KERNEL_NIC_PRIO_NUM_LEVELS 4 #define KERNEL_NIC_NUM_PRIOS 1 @@ -83,6 +91,11 @@ #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -98,24 +111,24 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 6, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(1, 1), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), @@ -1152,9 +1165,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, static struct mlx5_flow_rule * _mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1168,22 +1179,23 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, - match_criteria_enable, + spec->match_criteria_enable, g->mask.match_criteria, - match_criteria)) { - rule = add_rule_fg(g, match_value, + spec->match_criteria)) { + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } - g = create_autogroup(ft, match_criteria_enable, match_criteria); + g = create_autogroup(ft, spec->match_criteria_enable, + spec->match_criteria); if (IS_ERR(g)) { rule = (void *)g; goto unlock; } - rule = add_rule_fg(g, match_value, + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group @@ -1207,9 +1219,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1240,8 +1250,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, } } - rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, action, flow_tag, dest); + rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!IS_ERR_OR_NULL(rule) && @@ -1359,40 +1368,47 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; int prio; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; - if (!root_ns) + if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: - if (dev->priv.fdb_root_ns) - return &dev->priv.fdb_root_ns->ns; + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (dev->priv.esw_egress_root_ns) - return &dev->priv.esw_egress_root_ns->ns; + if (steering->esw_egress_root_ns) + return &steering->esw_egress_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (dev->priv.esw_ingress_root_ns) - return &dev->priv.esw_ingress_root_ns->ns; + if (steering->esw_ingress_root_ns) + return &steering->esw_ingress_root_ns->ns; else return NULL; default: return NULL; } + root_ns = steering->root_ns; + if (!root_ns) + return NULL; + fs_prio = find_prio(&root_ns->ns, prio); if (!fs_prio) return NULL; @@ -1478,13 +1494,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) return true; } -static int init_root_tree_recursive(struct mlx5_core_dev *dev, +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, int prio) { - int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, flow_table_properties_nic_receive. max_ft_level); struct mlx5_flow_namespace *fs_ns; @@ -1495,7 +1511,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, if (init_node->type == FS_TYPE_PRIO) { if ((init_node->min_ft_level > max_ft_level) || - !has_required_caps(dev, &init_node->caps)) + !has_required_caps(steering->dev, &init_node->caps)) return 0; fs_get_obj(fs_ns, fs_parent_node); @@ -1516,7 +1532,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } prio = 0; for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], + err = init_root_tree_recursive(steering, &init_node->children[i], base, init_node, prio); if (err) return err; @@ -1529,7 +1545,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, return 0; } -static int init_root_tree(struct mlx5_core_dev *dev, +static int init_root_tree(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node) { @@ -1539,7 +1555,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, fs_get_obj(fs_ns, fs_parent_node); for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], + err = init_root_tree_recursive(steering, &init_node->children[i], &fs_ns->node, init_node, i); if (err) @@ -1548,7 +1564,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, return 0; } -static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering, enum fs_flow_table_type table_type) { @@ -1560,7 +1576,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev if (!root_ns) return NULL; - root_ns->dev = dev; + root_ns->dev = steering->dev; root_ns->table_type = table_type; ns = &root_ns->ns; @@ -1615,220 +1631,135 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) #define ANCHOR_PRIO 0 #define ANCHOR_SIZE 1 #define ANCHOR_LEVEL 0 -static int create_anchor_flow_table(struct mlx5_core_dev - *dev) +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; struct mlx5_flow_table *ft; - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); if (IS_ERR(ft)) { - mlx5_core_err(dev, "Failed to create last anchor flow table"); + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); } return 0; } -static int init_root_ns(struct mlx5_core_dev *dev) +static int init_root_ns(struct mlx5_flow_steering *steering) { - dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(dev->priv.root_ns)) + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(steering->root_ns)) goto cleanup; - if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node)) + if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) goto cleanup; - set_prio_attrs(dev->priv.root_ns); + set_prio_attrs(steering->root_ns); - if (create_anchor_flow_table(dev)) + if (create_anchor_flow_table(steering)) goto cleanup; return 0; cleanup: - mlx5_cleanup_fs(dev); + mlx5_cleanup_fs(steering->dev); return -ENOMEM; } -static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, - struct mlx5_flow_root_namespace *root_ns) +static void clean_tree(struct fs_node *node) { - struct fs_node *prio; - - if (!root_ns) - return; + if (node) { + struct fs_node *iter; + struct fs_node *temp; - if (!list_empty(&root_ns->ns.node.children)) { - prio = list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - if (tree_remove_node(prio)) - mlx5_core_warn(dev, - "Flow steering priority wasn't destroyed, refcount > 1\n"); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_remove_node(node); } - if (tree_remove_node(&root_ns->ns.node)) - mlx5_core_warn(dev, - "Flow steering namespace wasn't destroyed, refcount > 1\n"); - root_ns = NULL; } -static void destroy_flow_tables(struct fs_prio *prio) +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) { - struct mlx5_flow_table *iter; - struct mlx5_flow_table *tmp; - - fs_for_each_ft_safe(iter, tmp, prio) - mlx5_destroy_flow_table(iter); -} - -static void cleanup_root_ns(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; - struct fs_prio *iter_prio; - - if (!MLX5_CAP_GEN(dev, nic_flow_table)) - return; - if (!root_ns) return; - /* stage 1 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - struct fs_node *node; - struct mlx5_flow_namespace *iter_ns; - - fs_for_each_ns_or_ft(node, iter_prio) { - if (node->type == FS_TYPE_FLOW_TABLE) - continue; - fs_get_obj(iter_ns, node); - while (!list_empty(&iter_ns->node.children)) { - struct fs_prio *obj_iter_prio2; - struct fs_node *iter_prio2 = - list_first_entry(&iter_ns->node.children, - struct fs_node, - list); - - fs_get_obj(obj_iter_prio2, iter_prio2); - destroy_flow_tables(obj_iter_prio2); - if (tree_remove_node(iter_prio2)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_iter_prio2->prio); - return; - } - } - } - } - - /* stage 2 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - while (!list_empty(&iter_prio->node.children)) { - struct fs_node *iter_ns = - list_first_entry(&iter_prio->node.children, - struct fs_node, - list); - if (tree_remove_node(iter_ns)) { - mlx5_core_warn(dev, - "Namespace wasn't destroyed, refcount > 1\n"); - return; - } - } - } - - /* stage 3 */ - while (!list_empty(&root_ns->ns.node.children)) { - struct fs_prio *obj_prio_node; - struct fs_node *prio_node = - list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - - fs_get_obj(obj_prio_node, prio_node); - if (tree_remove_node(prio_node)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_prio_node->prio); - return; - } - } - - if (tree_remove_node(&root_ns->ns.node)) { - mlx5_core_warn(dev, - "root namespace wasn't destroyed, refcount > 1\n"); - return; - } - - dev->priv.root_ns = NULL; + clean_tree(&root_ns->ns.node); } void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + struct mlx5_flow_steering *steering = dev->priv.steering; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - cleanup_root_ns(dev); - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); - cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); - cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns); + cleanup_root_ns(steering->root_ns); + cleanup_root_ns(steering->esw_egress_root_ns); + cleanup_root_ns(steering->esw_ingress_root_ns); + cleanup_root_ns(steering->fdb_root_ns); mlx5_cleanup_fc_stats(dev); + kfree(steering); } -static int init_fdb_root_ns(struct mlx5_core_dev *dev) +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); - if (!dev->priv.fdb_root_ns) + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) return -ENOMEM; - /* Create single prio */ - prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1); - if (IS_ERR(prio)) { - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); - return PTR_ERR(prio); - } else { - return 0; - } + prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1); + if (IS_ERR(prio)) + goto out_err; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(prio)) + goto out_err; + + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + return PTR_ERR(prio); } -static int init_egress_acl_root_ns(struct mlx5_core_dev *dev) +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL); - if (!dev->priv.esw_egress_root_ns) + steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns) return -ENOMEM; /* create 1 prio*/ - prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); - if (IS_ERR(prio)) - return PTR_ERR(prio); - else - return 0; + prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); + return PTR_ERR_OR_ZERO(prio); } -static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev) +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL); - if (!dev->priv.esw_ingress_root_ns) + steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns) return -ENOMEM; /* create 1 prio*/ - prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); - if (IS_ERR(prio)) - return PTR_ERR(prio); - else - return 0; + prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); + return PTR_ERR_OR_ZERO(prio); } int mlx5_init_fs(struct mlx5_core_dev *dev) { + struct mlx5_flow_steering *steering; int err = 0; if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1838,26 +1769,32 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) if (err) return err; + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) + return -ENOMEM; + steering->dev = dev; + dev->priv.steering = steering; + if (MLX5_CAP_GEN(dev, nic_flow_table) && MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { - err = init_root_ns(dev); + err = init_root_ns(steering); if (err) goto err; } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { - err = init_fdb_root_ns(dev); + err = init_fdb_root_ns(steering); if (err) goto err; } if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acl_root_ns(dev); + err = init_egress_acl_root_ns(steering); if (err) goto err; } if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acl_root_ns(dev); + err = init_ingress_acl_root_ns(steering); if (err) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index aa41a7314691..9cffb6aeb4e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -55,6 +55,14 @@ enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace *esw_egress_root_ns; + struct mlx5_flow_root_namespace *esw_ingress_root_ns; +}; + struct fs_node { struct list_head list; struct list_head children; @@ -103,6 +111,7 @@ struct mlx5_fc_cache { }; struct mlx5_fc { + struct rb_node node; struct list_head list; /* last{packets,bytes} members are used when calculating the delta since diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 164dc37fda72..c2877e9de8a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -32,6 +32,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> +#include <linux/rbtree.h> #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" @@ -68,32 +69,108 @@ * elapsed, the thread will actually query the hardware. */ +static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node); + int result = counter->id - this->id; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&counter->node, parent, new); + rb_insert_color(&counter->node, root); +} + +static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u16 last_id) +{ + struct mlx5_cmd_fc_bulk *b; + struct rb_node *node = NULL; + u16 afirst_id; + int num; + int err; + int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + /* first id must be aligned to 4 when using bulk query */ + afirst_id = first->id & ~0x3; + + /* number of counters to query inc. the last counter */ + num = ALIGN(last_id - afirst_id + 1, 4); + if (num > max_bulk) { + num = max_bulk; + last_id = afirst_id + num - 1; + } + + b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); + if (!b) { + mlx5_core_err(dev, "Error allocating resources for bulk query\n"); + return NULL; + } + + err = mlx5_cmd_fc_bulk_query(dev, b); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + goto out; + } + + for (node = &first->node; node; node = rb_next(node)) { + struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); + struct mlx5_fc_cache *c = &counter->cache; + + if (counter->id > last_id) + break; + + mlx5_cmd_fc_bulk_get(dev, b, + counter->id, &c->packets, &c->bytes); + } + +out: + mlx5_cmd_fc_bulk_free(b); + + return node; +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, priv.fc_stats.work.work); struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; unsigned long now = jiffies; - struct mlx5_fc *counter; - struct mlx5_fc *tmp; - int err = 0; + struct mlx5_fc *counter = NULL; + struct mlx5_fc *last = NULL; + struct rb_node *node; + LIST_HEAD(tmplist); spin_lock(&fc_stats->addlist_lock); - list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); + list_splice_tail_init(&fc_stats->addlist, &tmplist); - if (!list_empty(&fc_stats->list)) + if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); spin_unlock(&fc_stats->addlist_lock); - list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { - struct mlx5_fc_cache *c = &counter->cache; - u64 packets; - u64 bytes; + list_for_each_entry(counter, &tmplist, list) + mlx5_fc_stats_insert(&fc_stats->counters, counter); + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); if (counter->deleted) { - list_del(&counter->list); + rb_erase(&counter->node, &fc_stats->counters); mlx5_cmd_fc_free(dev, counter->id); @@ -101,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work) continue; } - if (time_before(now, fc_stats->next_query)) - continue; + last = counter; + } - err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes); - if (err) { - pr_err("Error querying stats for counter id %d\n", - counter->id); - continue; - } + if (time_before(now, fc_stats->next_query) || !last) + return; - if (packets == c->packets) - continue; + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); - c->lastuse = jiffies; - c->packets = packets; - c->bytes = bytes; + node = mlx5_fc_stats_query(dev, counter, last->id); } - if (time_after_eq(now, fc_stats->next_query)) - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) @@ -176,7 +247,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - INIT_LIST_HEAD(&fc_stats->list); + fc_stats->counters = RB_ROOT; INIT_LIST_HEAD(&fc_stats->addlist); spin_lock_init(&fc_stats->addlist_lock); @@ -194,20 +265,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc *counter; struct mlx5_fc *tmp; + struct rb_node *node; cancel_delayed_work_sync(&dev->priv.fc_stats.work); destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); - - list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { + list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) { list_del(&counter->list); mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } } void mlx5_fc_query_cached(struct mlx5_fc *counter, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 75c7ae6a5cc4..77fc1aa26114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -151,6 +151,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 96a59463ae65..1a05fb965c8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -272,7 +272,7 @@ static void poll_health(unsigned long data) if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); - queue_work(health->wq, &health->work); + schedule_work(&health->work); } } @@ -301,7 +301,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - destroy_workqueue(health->wq); + flush_work(&health->work); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -316,10 +316,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) strcpy(name, "mlx5_health"); strcat(name, dev_name(&dev->pdev->dev)); - health->wq = create_singlethread_workqueue(name); kfree(name); - if (!health->wq) - return -ENOMEM; INIT_WORK(&health->work, health_care); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6695893ddd2d..4f491d43e77d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -51,6 +51,7 @@ #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif +#include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -1144,6 +1145,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) dev_err(&pdev->dev, "Failed to init flow steering\n"); goto err_fs; } + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_rl; + } + #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1183,6 +1191,8 @@ err_sriov: mlx5_eswitch_cleanup(dev->priv.eswitch); #endif err_reg_dev: + mlx5_cleanup_rl_table(dev); +err_rl: mlx5_cleanup_fs(dev); err_fs: mlx5_cleanup_mkey_table(dev); @@ -1253,6 +1263,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_eswitch_cleanup(dev->priv.eswitch); #endif + mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1305,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(&pdev->dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = &dev->priv; priv->pci_dev_data = id->driver_data; @@ -1354,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, &pdev->dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1370,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1380,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 3e35611b19c3..752c08127138 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -202,15 +202,24 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, - int proto_mask) +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask) { - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_query_port_autoneg(dev, proto_mask, &an_status, + &an_disable_cap, &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; memset(in, 0, sizeof(in)); MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); if (proto_mask == MLX5_PTYS_EN) MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); @@ -220,7 +229,19 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_proto); +EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); + +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) @@ -518,6 +539,25 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); + int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 000000000000..c07c28bd3d55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + u32 rate) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + for (i = 0; i < table->max_size; i++) { + if (table->rl_entry[i].rate == rate) + return &table->rl_entry[i]; + if (!empty_found && !table->rl_entry[i].rate) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, + u32 rate, u16 index) +{ + u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]; + u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_RATE_LIMIT); + MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + int err = 0; + + mutex_lock(&table->rl_lock); + + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rate, table->min_rate, table->max_rate); + err = -EINVAL; + goto out; + } + + entry = find_rl_entry(table, rate); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto out; + } + if (entry->refcount) { + /* rate already configured */ + entry->refcount++; + } else { + /* new rate limit */ + err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + if (err) { + mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", + rate, err); + goto out; + } + entry->rate = rate; + entry->refcount = 1; + } + *index = entry->index; + +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + + /* 0 is a reserved value for unlimited rate */ + if (rate == 0) + return; + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rate); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u is not configured\n", rate); + goto out; + } + + entry->refcount--; + if (!entry->refcount) { + /* need to remove rate */ + mlx5_set_rate_limit_cmd(dev, 0, entry->index); + entry->rate = 0; + } + +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + mutex_init(&table->rl_lock); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + /* Index 0 is reserved */ + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].rate) + mlx5_set_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); + + kfree(dev->priv.rl_table.rl_entry); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index d6a3f412ba9f..b380a6bc1f85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_init_vfs(dev, num_vfs); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); #endif return num_vfs; @@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) mlx5_core_init_vfs(dev, cur_vfs); #ifdef CONFIG_MLX5_CORE_EN if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, + SRIOV_LEGACY); #endif enable_vfs(dev, cur_vfs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 91846dfcbe9c..21365d06982b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -135,6 +135,18 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); } +void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0}; + + mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out)); + + *min_inline_mode = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.min_wqe_inline_mode); +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); + int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 05de77267d58..e25a73ed2981 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9b5ebf84c051..d20ae1838a64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ - spectrum_switchdev.o + spectrum_switchdev.o spectrum_router.o \ + spectrum_kvdl.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index cd63b8263688..28271bedd957 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -105,6 +105,7 @@ enum mlxsw_cmd_opcode { MLXSW_CMD_OPCODE_SW2HW_EQ = 0x013, MLXSW_CMD_OPCODE_HW2SW_EQ = 0x014, MLXSW_CMD_OPCODE_QUERY_EQ = 0x015, + MLXSW_CMD_OPCODE_QUERY_RESOURCES = 0x101, }; static inline const char *mlxsw_cmd_opcode_str(u16 opcode) @@ -144,6 +145,8 @@ static inline const char *mlxsw_cmd_opcode_str(u16 opcode) return "HW2SW_EQ"; case MLXSW_CMD_OPCODE_QUERY_EQ: return "QUERY_EQ"; + case MLXSW_CMD_OPCODE_QUERY_RESOURCES: + return "QUERY_RESOURCES"; default: return "*UNKNOWN*"; } @@ -500,6 +503,35 @@ static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core) return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0); } +/* QUERY_RESOURCES - Query chip resources + * -------------------------------------- + * OpMod == 0 (N/A) , INMmod is index + * ---------------------------------- + * The QUERY_RESOURCES command retrieves information related to chip resources + * by resource ID. Every command returns 32 entries. INmod is being use as base. + * for example, index 1 will return entries 32-63. When the tables end and there + * are no more sources in the table, will return resource id 0xFFF to indicate + * it. + */ +static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core, + char *out_mbox, int index) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_RESOURCES, + 0, index, false, out_mbox, + MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_resource_id + * The resource id. 0xFFFF indicates table's end. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, query_resource, id, 0x00, 16, 16, 0x8, 0, false); + +/* cmd_mbox_query_resource_data + * The resource + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, query_resource, data, + 0x00, 0, 40, 0x8, 0, false); + /* CONFIG_PROFILE (Set) - Configure Switch Profile * ------------------------------ * OpMod == 1 (Set), INMmod == 0 (N/A) @@ -607,6 +639,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile, */ MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); +/* cmd_mbox_config_set_kvd_linear_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1); + +/* cmd_mbox_config_set_kvd_hash_single_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); + +/* cmd_mbox_config_set_kvd_hash_double_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); + /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -733,6 +783,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); */ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); +/* cmd_mbox_config_kvd_linear_size + * KVD Linear Size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24); + +/* cmd_mbox_config_kvd_hash_single_size + * KVD Hash single-entries size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be greater or equal to cap_min_kvd_hash_single_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24); + +/* cmd_mbox_config_kvd_hash_double_size + * KVD Hash double-entries size (units of single-size entries) + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24); + /* cmd_mbox_config_profile_swid_config_mask * Modify Switch Partition Configuration mask. When set, the configu- * ration value for the Switch Partition are taken from the mailbox. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index b0a0b01bb4ef..068ee65a960b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -58,6 +58,7 @@ #include <linux/workqueue.h> #include <asm/byteorder.h> #include <net/devlink.h> +#include <trace/events/devlink.h> #include "core.h" #include "item.h" @@ -110,6 +111,7 @@ struct mlxsw_core { struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ } lag; + struct mlxsw_resources resources; struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ @@ -447,6 +449,10 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, if (!skb) return -ENOMEM; + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0, + skb->data + mlxsw_core->driver->txhdr_len, + skb->len - mlxsw_core->driver->txhdr_len); + atomic_set(&trans->active, 1); err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); if (err) { @@ -529,6 +535,9 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, struct mlxsw_core *mlxsw_core = priv; struct mlxsw_reg_trans *trans; + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0, + skb->data, skb->len); + if (!mlxsw_emad_is_resp(skb)) goto free_skb; @@ -1102,7 +1111,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); if (err) goto err_bus_init; @@ -1110,14 +1120,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; - err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); - if (err) - goto err_hwmon_init; - err = devlink_register(devlink, mlxsw_bus_info->dev); if (err) goto err_devlink_register; + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); if (err) goto err_driver_init; @@ -1131,9 +1141,9 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core); err_driver_init: +err_hwmon_init: devlink_unregister(devlink); err_devlink_register: -err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); @@ -1644,6 +1654,12 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); +struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core) +{ + return &mlxsw_core->resources; +} +EXPORT_SYMBOL(mlxsw_core_resources_get); + int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, struct mlxsw_core_port *mlxsw_core_port, u8 local_port, struct net_device *dev, bool split, u32 split_group) @@ -1736,7 +1752,7 @@ static int __init mlxsw_core_module_init(void) { int err; - mlxsw_wq = create_workqueue(mlxsw_core_driver_name); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 436bc49df6ab..d3476ead9982 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_ib_mc:1, used_max_pkey:1, used_ar_sec:1, - used_adaptive_routing_group_cap:1; + used_adaptive_routing_group_cap:1, + used_kvd_sizes:1; u8 max_vepa_channels; u16 max_lag; u16 max_port_per_lag; @@ -211,6 +212,10 @@ struct mlxsw_config_profile { u8 ar_sec; u16 adaptive_routing_group_cap; u8 arn; + u32 kvd_linear_size; + u32 kvd_hash_single_size; + u32 kvd_hash_double_size; + u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -262,10 +267,18 @@ struct mlxsw_driver { const struct mlxsw_config_profile *profile; }; +struct mlxsw_resources { + u8 max_span_valid:1; + u8 max_span; +}; + +struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); + struct mlxsw_bus { const char *kind; int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, - const struct mlxsw_config_profile *profile); + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources); void (*fini)(void *bus_priv); bool (*skb_transmit_busy)(void *bus_priv, const struct mlxsw_tx_info *tx_info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7f4173c8eda3..1d1360c178bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1154,6 +1154,61 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); } +#define MLXSW_RESOURCES_TABLE_END_ID 0xffff +#define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 +#define MLXSW_RESOURCES_PER_QUERY 32 + +static void mlxsw_pci_resources_query_parse(int id, u64 val, + struct mlxsw_resources *resources) +{ + switch (id) { + case MLXSW_MAX_SPAN_ID: + resources->max_span = val; + resources->max_span_valid = 1; + break; + default: + break; + } +} + +static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_resources *resources, + u8 query_enabled) +{ + int index, i; + u64 data; + u16 id; + int err; + + /* Not all the versions support resources query */ + if (!query_enabled) + return 0; + + mlxsw_cmd_mbox_zero(mbox); + + for (index = 0; index < MLXSW_RESOURCES_QUERY_MAX_QUERIES; index++) { + err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index); + if (err) + return err; + + for (i = 0; i < MLXSW_RESOURCES_PER_QUERY; i++) { + id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); + data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); + + if (id == MLXSW_RESOURCES_TABLE_END_ID) + return 0; + + mlxsw_pci_resources_query_parse(id, data, resources); + } + } + + /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get + * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW. + */ + return -EIO; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, const struct mlxsw_config_profile *profile) { @@ -1255,6 +1310,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } + if (profile->used_kvd_sizes) { + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( + mbox, profile->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( + mbox, profile->kvd_hash_single_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( + mbox, profile->kvd_hash_double_size); + } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, @@ -1390,7 +1459,8 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, } static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { struct mlxsw_pci *mlxsw_pci = bus_priv; struct pci_dev *pdev = mlxsw_pci->pdev; @@ -1449,6 +1519,11 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_boardinfo; + err = mlxsw_pci_resources_query(mlxsw_pci, mbox, resources, + profile->resource_query_enable); + if (err) + goto err_query_resources; + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); if (err) goto err_config_profile; @@ -1471,6 +1546,7 @@ err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: err_config_profile: +err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); err_fw_area_init: diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1977e7a5c530..7ca9201f7dcb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/reg.h * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -386,7 +387,9 @@ enum mlxsw_reg_sfd_rec_action { /* forward and trap, trap_id is FDB_TRAP */ MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1, /* trap and do not forward, trap_id is FDB_TRAP */ - MLXSW_REG_SFD_REC_ACTION_TRAP = 3, + MLXSW_REG_SFD_REC_ACTION_TRAP = 2, + /* forward to IP router */ + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3, MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15, }; @@ -2500,6 +2503,7 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, + MLXSW_REG_PPCNT_TC_CNT = 0x11, }; /* reg_ppcnt_grp @@ -2700,6 +2704,23 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); */ MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); +/* Ethernet Per Traffic Group Counters */ + +/* reg_ppcnt_tc_transmit_queue + * Contains the transmit queue depth in cells of traffic class + * selected by prio_tc and the port selected by local_port. + * The field cannot be cleared. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_tc_no_buffer_discard_uc + * The number of unicast packets dropped due to lack of shared + * buffer resources. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64); + static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, u8 prio_tc) @@ -2718,7 +2739,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, * Configures the switch priority to buffer table. */ #define MLXSW_REG_PPTB_ID 0x500B -#define MLXSW_REG_PPTB_LEN 0x0C +#define MLXSW_REG_PPTB_LEN 0x10 static const struct mlxsw_reg_info mlxsw_reg_pptb = { .id = MLXSW_REG_PPTB_ID, @@ -2784,6 +2805,13 @@ MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8); */ MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4); +/* reg_pptb_prio_to_buff_msb + * Mapping of switch priority <i+8> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4); + #define MLXSW_REG_PPTB_ALL_PRIO 0xFF static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port) @@ -2792,6 +2820,14 @@ static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port) mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM); mlxsw_reg_pptb_local_port_set(payload, local_port); mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO); + mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO); +} + +static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio, + u8 buff) +{ + mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff); + mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff); } /* PBMC - Port Buffer Management Control Register @@ -3186,6 +3222,1183 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* RGCR - Router General Configuration Register + * -------------------------------------------- + * The register is used for setting up the router configuration. + */ +#define MLXSW_REG_RGCR_ID 0x8001 +#define MLXSW_REG_RGCR_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_rgcr = { + .id = MLXSW_REG_RGCR_ID, + .len = MLXSW_REG_RGCR_LEN, +}; + +/* reg_rgcr_ipv4_en + * IPv4 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1); + +/* reg_rgcr_ipv6_en + * IPv6 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1); + +/* reg_rgcr_max_router_interfaces + * Defines the maximum number of active router interfaces for all virtual + * routers. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16); + +/* reg_rgcr_usp + * Update switch priority and packet color. + * 0 - Preserve the value of Switch Priority and packet color. + * 1 - Recalculate the value of Switch Priority and packet color. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1); + +/* reg_rgcr_pcp_rw + * Indicates how to handle the pcp_rewrite_en value: + * 0 - Preserve the value of pcp_rewrite_en. + * 2 - Disable PCP rewrite. + * 3 - Enable PCP rewrite. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); + +/* reg_rgcr_activity_dis + * Activity disable: + * 0 - Activity will be set when an entry is hit (default). + * 1 - Activity will not be set when an entry is hit. + * + * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry + * (RALUE). + * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host + * Entry (RAUHT). + * Bits 2:7 are reserved. + * Access: RW + * + * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB. + */ +MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); + +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +{ + MLXSW_REG_ZERO(rgcr, payload); + mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); +} + +/* RITR - Router Interface Table Register + * -------------------------------------- + * The register is used to configure the router interface table. + */ +#define MLXSW_REG_RITR_ID 0x8002 +#define MLXSW_REG_RITR_LEN 0x40 + +static const struct mlxsw_reg_info mlxsw_reg_ritr = { + .id = MLXSW_REG_RITR_ID, + .len = MLXSW_REG_RITR_LEN, +}; + +/* reg_ritr_enable + * Enables routing on the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1); + +/* reg_ritr_ipv4 + * IPv4 routing enable. Enables routing of IPv4 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); + +/* reg_ritr_ipv6 + * IPv6 routing enable. Enables routing of IPv6 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); + +enum mlxsw_reg_ritr_if_type { + MLXSW_REG_RITR_VLAN_IF, + MLXSW_REG_RITR_FID_IF, + MLXSW_REG_RITR_SP_IF, +}; + +/* reg_ritr_type + * Router interface type. + * 0 - VLAN interface. + * 1 - FID interface. + * 2 - Sub-port interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); + +enum { + MLXSW_REG_RITR_RIF_CREATE, + MLXSW_REG_RITR_RIF_DEL, +}; + +/* reg_ritr_op + * Opcode: + * 0 - Create or edit RIF. + * 1 - Delete RIF. + * Reserved for SwitchX-2. For Spectrum, editing of interface properties + * is not supported. An interface must be deleted and re-created in order + * to update properties. + * Access: WO + */ +MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2); + +/* reg_ritr_rif + * Router interface index. A pointer to the Router Interface Table. + * Access: Index + */ +MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16); + +/* reg_ritr_ipv4_fe + * IPv4 Forwarding Enable. + * Enables routing of IPv4 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); + +/* reg_ritr_ipv6_fe + * IPv6 Forwarding Enable. + * Enables routing of IPv6 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); + +/* reg_ritr_virtual_router + * Virtual router ID associated with the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16); + +/* reg_ritr_mtu + * Router interface MTU. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16); + +/* reg_ritr_if_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8); + +/* reg_ritr_if_mac + * Router interface MAC address. + * In Spectrum, all MAC addresses must have the same 38 MSBits. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6); + +/* VLAN Interface */ + +/* reg_ritr_vlan_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12); + +/* FID Interface */ + +/* reg_ritr_fid_if_fid + * Filtering ID. Used to connect a bridge to the router. Only FIDs from + * the vFID range are supported. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16); + +static inline void mlxsw_reg_ritr_fid_set(char *payload, + enum mlxsw_reg_ritr_if_type rif_type, + u16 fid) +{ + if (rif_type == MLXSW_REG_RITR_FID_IF) + mlxsw_reg_ritr_fid_if_fid_set(payload, fid); + else + mlxsw_reg_ritr_vlan_if_vid_set(payload, fid); +} + +/* Sub-port Interface */ + +/* reg_ritr_sp_if_lag + * LAG indication. When this bit is set the system_port field holds the + * LAG identifier. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1); + +/* reg_ritr_sp_system_port + * Port unique indentifier. When lag bit is set, this field holds the + * lag_id in bits 0:9. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); + +/* reg_ritr_sp_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); + +static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) +{ + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_rif_set(payload, rif); +} + +static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, + u16 system_port, u16 vid) +{ + mlxsw_reg_ritr_sp_if_lag_set(payload, lag); + mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port); + mlxsw_reg_ritr_sp_if_vid_set(payload, vid); +} + +static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, + enum mlxsw_reg_ritr_if_type type, + u16 rif, u16 mtu, const char *mac) +{ + bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; + + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_enable_set(payload, enable); + mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_type_set(payload, type); + mlxsw_reg_ritr_op_set(payload, op); + mlxsw_reg_ritr_rif_set(payload, rif); + mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_mtu_set(payload, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); +} + +/* RATR - Router Adjacency Table Register + * -------------------------------------- + * The RATR register is used to configure the Router Adjacency (next-hop) + * Table. + */ +#define MLXSW_REG_RATR_ID 0x8008 +#define MLXSW_REG_RATR_LEN 0x2C + +static const struct mlxsw_reg_info mlxsw_reg_ratr = { + .id = MLXSW_REG_RATR_ID, + .len = MLXSW_REG_RATR_LEN, +}; + +enum mlxsw_reg_ratr_op { + /* Read */ + MLXSW_REG_RATR_OP_QUERY_READ = 0, + /* Read and clear activity */ + MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2, + /* Write Adjacency entry */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1, + /* Write Adjacency entry only if the activity is cleared. + * The write may not succeed if the activity is set. There is not + * direct feedback if the write has succeeded or not, however + * the get will reveal the actual entry (SW can compare the get + * response to the set command). + */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3, +}; + +/* reg_ratr_op + * Note that Write operation may also be used for updating + * counter_set_type and counter_index. In this case all other + * fields must not be updated. + * Access: OP + */ +MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4); + +/* reg_ratr_v + * Valid bit. Indicates if the adjacency entry is valid. + * Note: the device may need some time before reusing an invalidated + * entry. During this time the entry can not be reused. It is + * recommended to use another entry before reusing an invalidated + * entry (e.g. software can put it at the end of the list for + * reusing). Trying to access an invalidated entry not yet cleared + * by the device results with failure indicating "Try Again" status. + * When valid is '0' then egress_router_interface,trap_action, + * adjacency_parameters and counters are reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); + +/* reg_ratr_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. To clear the a bit, use "clear activity". + * Access: RO + */ +MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); + +/* reg_ratr_adjacency_index_low + * Bits 15:0 of index into the adjacency table. + * For SwitchX and SwitchX-2, the adjacency table is linear and + * used for adjacency entries only. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16); + +/* reg_ratr_egress_router_interface + * Range is 0 .. cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16); + +enum mlxsw_reg_ratr_trap_action { + MLXSW_REG_RATR_TRAP_ACTION_NOP, + MLXSW_REG_RATR_TRAP_ACTION_TRAP, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_ratr_trap_action + * see mlxsw_reg_ratr_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); + +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, +}; + +/* reg_ratr_adjacency_index_high + * Bits 23:16 of the adjacency_index. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); + +/* reg_ratr_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); + +/* reg_ratr_eth_destination_mac + * MAC address of the destination next-hop. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); + +static inline void +mlxsw_reg_ratr_pack(char *payload, + enum mlxsw_reg_ratr_op op, bool valid, + u32 adjacency_index, u16 egress_rif) +{ + MLXSW_REG_ZERO(ratr, payload); + mlxsw_reg_ratr_op_set(payload, op); + mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); + mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); + mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); +} + +static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, + const char *dest_mac) +{ + mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); +} + +/* RALTA - Router Algorithmic LPM Tree Allocation Register + * ------------------------------------------------------- + * RALTA is used to allocate the LPM trees of the SHSPM method. + */ +#define MLXSW_REG_RALTA_ID 0x8010 +#define MLXSW_REG_RALTA_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_ralta = { + .id = MLXSW_REG_RALTA_ID, + .len = MLXSW_REG_RALTA_LEN, +}; + +/* reg_ralta_op + * opcode (valid for Write, must be 0 on Read) + * 0 - allocate a tree + * 1 - deallocate a tree + * Access: OP + */ +MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2); + +enum mlxsw_reg_ralxx_protocol { + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_REG_RALXX_PROTOCOL_IPV6, +}; + +/* reg_ralta_protocol + * Protocol. + * Deallocation opcode: Reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4); + +/* reg_ralta_tree_id + * An identifier (numbered from 1..cap_shspm_max_trees-1) representing + * the tree identifier (managed by software). + * Note that tree_id 0 is allocated for a default-route tree. + * Access: Index + */ +MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(ralta, payload); + mlxsw_reg_ralta_op_set(payload, !alloc); + mlxsw_reg_ralta_protocol_set(payload, protocol); + mlxsw_reg_ralta_tree_id_set(payload, tree_id); +} + +/* RALST - Router Algorithmic LPM Structure Tree Register + * ------------------------------------------------------ + * RALST is used to set and query the structure of an LPM tree. + * The structure of the tree must be sorted as a sorted binary tree, while + * each node is a bin that is tagged as the length of the prefixes the lookup + * will refer to. Therefore, bin X refers to a set of entries with prefixes + * of X bits to match with the destination address. The bin 0 indicates + * the default action, when there is no match of any prefix. + */ +#define MLXSW_REG_RALST_ID 0x8011 +#define MLXSW_REG_RALST_LEN 0x104 + +static const struct mlxsw_reg_info mlxsw_reg_ralst = { + .id = MLXSW_REG_RALST_ID, + .len = MLXSW_REG_RALST_LEN, +}; + +/* reg_ralst_root_bin + * The bin number of the root bin. + * 0<root_bin=<(length of IP address) + * For a default-route tree configure 0xff + * Access: RW + */ +MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8); + +/* reg_ralst_tree_id + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * Access: Index + */ +MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8); + +#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff +#define MLXSW_REG_RALST_BIN_OFFSET 0x04 +#define MLXSW_REG_RALST_BIN_COUNT 128 + +/* reg_ralst_left_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false); + +/* reg_ralst_right_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00, + false); + +static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + MLXSW_REG_ZERO(ralst, payload); + + /* Initialize all bins to have no left or right child */ + memset(payload + MLXSW_REG_RALST_BIN_OFFSET, + MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2); + + mlxsw_reg_ralst_root_bin_set(payload, root_bin); + mlxsw_reg_ralst_tree_id_set(payload, tree_id); +} + +static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + int bin_index = bin_number - 1; + + mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin); + mlxsw_reg_ralst_right_child_bin_set(payload, bin_index, + right_child_bin); +} + +/* RALTB - Router Algorithmic LPM Tree Binding Register + * ---------------------------------------------------- + * RALTB is used to bind virtual router and protocol to an allocated LPM tree. + */ +#define MLXSW_REG_RALTB_ID 0x8012 +#define MLXSW_REG_RALTB_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_raltb = { + .id = MLXSW_REG_RALTB_ID, + .len = MLXSW_REG_RALTB_LEN, +}; + +/* reg_raltb_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16); + +/* reg_raltb_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4); + +/* reg_raltb_tree_id + * Tree to be used for the {virtual_router, protocol} + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0. + * Access: RW + */ +MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(raltb, payload); + mlxsw_reg_raltb_virtual_router_set(payload, virtual_router); + mlxsw_reg_raltb_protocol_set(payload, protocol); + mlxsw_reg_raltb_tree_id_set(payload, tree_id); +} + +/* RALUE - Router Algorithmic LPM Unicast Entry Register + * ----------------------------------------------------- + * RALUE is used to configure and query LPM entries that serve + * the Unicast protocols. + */ +#define MLXSW_REG_RALUE_ID 0x8013 +#define MLXSW_REG_RALUE_LEN 0x38 + +static const struct mlxsw_reg_info mlxsw_reg_ralue = { + .id = MLXSW_REG_RALUE_ID, + .len = MLXSW_REG_RALUE_LEN, +}; + +/* reg_ralue_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4); + +enum mlxsw_reg_ralue_op { + /* Read operation. If entry doesn't exist, the operation fails. */ + MLXSW_REG_RALUE_OP_QUERY_READ = 0, + /* Clear on read operation. Used to read entry and + * clear Activity bit. + */ + MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1, + /* Write operation. Used to write a new entry to the table. All RW + * fields are written for new entry. Activity bit is set + * for new entries. + */ + MLXSW_REG_RALUE_OP_WRITE_WRITE = 0, + /* Update operation. Used to update an existing route entry and + * only update the RW fields that are detailed in the field + * op_u_mask. If entry doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1, + /* Clear activity. The Activity bit (the field a) is cleared + * for the entry. + */ + MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2, + /* Delete operation. Used to delete an existing entry. If entry + * doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_DELETE = 3, +}; + +/* reg_ralue_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3); + +/* reg_ralue_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry, only if the entry is a route. To clear the a bit, use + * "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1); + +/* reg_ralue_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16); + +#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE BIT(0) +#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN BIT(1) +#define MLXSW_REG_RALUE_OP_U_MASK_ACTION BIT(2) + +/* reg_ralue_op_u_mask + * opcode update mask. + * On read operation, this field is reserved. + * This field is valid for update opcode, otherwise - reserved. + * This field is a bitmask of the fields that should be updated. + * Access: WO + */ +MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3); + +/* reg_ralue_prefix_len + * Number of bits in the prefix of the LPM route. + * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes + * two entries in the physical HW table. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); + +/* reg_ralue_dip* + * The prefix of the route or of the marker that the object of the LPM + * is compared with. The most significant bits of the dip are the prefix. + * The list significant bits must be '0' if the prefix_len is smaller + * than 128 for IPv6 or smaller than 32 for IPv4. + * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); + +enum mlxsw_reg_ralue_entry_type { + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2, + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3, +}; + +/* reg_ralue_entry_type + * Entry type. + * Note - for Marker entries, the action_type and action fields are reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2); + +/* reg_ralue_bmp_len + * The best match prefix length in the case that there is no match for + * longer prefixes. + * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len + * Note for any update operation with entry_type modification this + * field must be set. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8); + +enum mlxsw_reg_ralue_action_type { + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME, +}; + +/* reg_ralue_action_type + * Action Type + * Indicates how the IP address is connected. + * It can be connected to a local subnet through local_erif or can be + * on a remote subnet connected through a next-hop router, + * or transmitted to the CPU. + * Reserved when entry_type = MARKER_ENTRY + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2); + +enum mlxsw_reg_ralue_trap_action { + MLXSW_REG_RALUE_TRAP_ACTION_NOP, + MLXSW_REG_RALUE_TRAP_ACTION_TRAP, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR, + MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR, +}; + +/* reg_ralue_trap_action + * Trap action. + * For IP2ME action, only NOP and MIRROR are possible. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4); + +/* reg_ralue_trap_id + * Trap ID to be reported to CPU. + * Trap ID is RTR_INGRESS0 or RTR_INGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9); + +/* reg_ralue_adjacency_index + * Points to the first entry of the group-based ECMP. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24); + +/* reg_ralue_ecmp_size + * Amount of sequential entries starting + * from the adjacency_index (the number of ECMPs). + * The valid range is 1-64, 512, 1024, 2048 and 4096. + * Reserved when trap_action is TRAP or DISCARD_ERROR. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); + +/* reg_ralue_local_erif + * Egress Router Interface. + * Only relevant in case of LOCAL action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); + +/* reg_ralue_v + * Valid bit for the tunnel_ptr field. + * If valid = 0 then trap to CPU as IP2ME trap ID. + * If valid = 1 and the packet format allows NVE or IPinIP tunnel + * decapsulation then tunnel decapsulation is done. + * If valid = 1 and packet format does not allow NVE or IPinIP tunnel + * decapsulation then trap as IP2ME trap ID. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); + +/* reg_ralue_tunnel_ptr + * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. + * For Spectrum, pointer to KVD Linear. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); + +static inline void mlxsw_reg_ralue_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len) +{ + MLXSW_REG_ZERO(ralue, payload); + mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); + mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); + mlxsw_reg_ralue_entry_type_set(payload, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY); + mlxsw_reg_ralue_bmp_len_set(payload, prefix_len); +} + +static inline void mlxsw_reg_ralue_pack4(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + u32 dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip4_set(payload, dip); +} + +static inline void +mlxsw_reg_ralue_act_remote_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size); +} + +static inline void +mlxsw_reg_ralue_act_local_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_local_erif_set(payload, local_erif); +} + +static inline void +mlxsw_reg_ralue_act_ip2me_pack(char *payload) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); +} + +/* RAUHT - Router Algorithmic LPM Unicast Host Table Register + * ---------------------------------------------------------- + * The RAUHT register is used to configure and query the Unicast Host table in + * devices that implement the Algorithmic LPM. + */ +#define MLXSW_REG_RAUHT_ID 0x8014 +#define MLXSW_REG_RAUHT_LEN 0x74 + +static const struct mlxsw_reg_info mlxsw_reg_rauht = { + .id = MLXSW_REG_RAUHT_ID, + .len = MLXSW_REG_RAUHT_LEN, +}; + +enum mlxsw_reg_rauht_type { + MLXSW_REG_RAUHT_TYPE_IPV4, + MLXSW_REG_RAUHT_TYPE_IPV6, +}; + +/* reg_rauht_type + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2); + +enum mlxsw_reg_rauht_op { + MLXSW_REG_RAUHT_OP_QUERY_READ = 0, + /* Read operation */ + MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1, + /* Clear on read operation. Used to read entry and clear + * activity bit. + */ + MLXSW_REG_RAUHT_OP_WRITE_ADD = 0, + /* Add. Used to write a new entry to the table. All R/W fields are + * relevant for new entry. Activity bit is set for new entries. + */ + MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1, + /* Update action. Used to update an existing route entry and + * only update the following fields: + * trap_action, trap_id, mac, counter_set_type, counter_index + */ + MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3, + /* Delete entry */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4, + /* Delete all host entries on a RIF. In this command, dip + * field is reserved. + */ +}; + +/* reg_rauht_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3); + +/* reg_rauht_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. + * To clear the a bit, use "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1); + +/* reg_rauht_rif + * Router Interface + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); + +/* reg_rauht_dip* + * Destination address. + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); + +enum mlxsw_reg_rauht_trap_action { + MLXSW_REG_RAUHT_TRAP_ACTION_NOP, + MLXSW_REG_RAUHT_TRAP_ACTION_TRAP, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR, + MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_rauht_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4); + +enum mlxsw_reg_rauht_trap_id { + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_rauht_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, + * trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); + +/* reg_rauht_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8); + +/* reg_rauht_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24); + +/* reg_rauht_mac + * MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6); + +static inline void mlxsw_reg_rauht_pack(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac) +{ + MLXSW_REG_ZERO(rauht, payload); + mlxsw_reg_rauht_op_set(payload, op); + mlxsw_reg_rauht_rif_set(payload, rif); + mlxsw_reg_rauht_mac_memcpy_to(payload, mac); +} + +static inline void mlxsw_reg_rauht_pack4(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, u32 dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_dip4_set(payload, dip); +} + +/* RALEU - Router Algorithmic LPM ECMP Update Register + * --------------------------------------------------- + * The register enables updating the ECMP section in the action for multiple + * LPM Unicast entries in a single operation. The update is executed to + * all entries of a {virtual router, protocol} tuple using the same ECMP group. + */ +#define MLXSW_REG_RALEU_ID 0x8015 +#define MLXSW_REG_RALEU_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_raleu = { + .id = MLXSW_REG_RALEU_ID, + .len = MLXSW_REG_RALEU_LEN, +}; + +/* reg_raleu_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4); + +/* reg_raleu_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16); + +/* reg_raleu_adjacency_index + * Adjacency Index used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24); + +/* reg_raleu_ecmp_size + * ECMP Size used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13); + +/* reg_raleu_new_adjacency_index + * New Adjacency Index. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24); + +/* reg_raleu_new_ecmp_size + * New ECMP Size. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13); + +static inline void mlxsw_reg_raleu_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + u16 virtual_router, + u32 adjacency_index, u16 ecmp_size, + u32 new_adjacency_index, + u16 new_ecmp_size) +{ + MLXSW_REG_ZERO(raleu, payload); + mlxsw_reg_raleu_protocol_set(payload, protocol); + mlxsw_reg_raleu_virtual_router_set(payload, virtual_router); + mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index); + mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size); +} + +/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register + * ---------------------------------------------------------------- + * The RAUHTD register allows dumping entries from the Router Unicast Host + * Table. For a given session an entry is dumped no more than one time. The + * first RAUHTD access after reset is a new session. A session ends when the + * num_rec response is smaller than num_rec request or for IPv4 when the + * num_entries is smaller than 4. The clear activity affect the current session + * or the last session if a new session has not started. + */ +#define MLXSW_REG_RAUHTD_ID 0x8018 +#define MLXSW_REG_RAUHTD_BASE_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32 +#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \ + MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) +#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 + +static const struct mlxsw_reg_info mlxsw_reg_rauhtd = { + .id = MLXSW_REG_RAUHTD_ID, + .len = MLXSW_REG_RAUHTD_LEN, +}; + +#define MLXSW_REG_RAUHTD_FILTER_A BIT(0) +#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) + +/* reg_rauhtd_filter_fields + * if a bit is '0' then the relevant field is ignored and dump is done + * regardless of the field value + * Bit0 - filter by activity: entry_a + * Bit3 - filter by entry rip: entry_rif + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8); + +enum mlxsw_reg_rauhtd_op { + MLXSW_REG_RAUHTD_OP_DUMP, + MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR, +}; + +/* reg_rauhtd_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2); + +/* reg_rauhtd_num_rec + * At request: number of records requested + * At response: number of records dumped + * For IPv4, each record has 4 entries at request and up to 4 entries + * at response + * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8); + +/* reg_rauhtd_entry_a + * Dump only if activity has value of entry_a + * Reserved if filter_fields bit0 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1); + +enum mlxsw_reg_rauhtd_type { + MLXSW_REG_RAUHTD_TYPE_IPV4, + MLXSW_REG_RAUHTD_TYPE_IPV6, +}; + +/* reg_rauhtd_type + * Dump only if record type is: + * 0 - IPv4 + * 1 - IPv6 + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4); + +/* reg_rauhtd_entry_rif + * Dump only if RIF has value of entry_rif + * Reserved if filter_fields bit3 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16); + +static inline void mlxsw_reg_rauhtd_pack(char *payload, + enum mlxsw_reg_rauhtd_type type) +{ + MLXSW_REG_ZERO(rauhtd, payload); + mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A); + mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR); + mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM); + mlxsw_reg_rauhtd_entry_a_set(payload, 1); + mlxsw_reg_rauhtd_type_set(payload, type); +} + +/* reg_rauhtd_ipv4_rec_num_entries + * Number of valid entries in this record: + * 0 - 1 valid entry + * 1 - 2 valid entries + * 2 - 3 valid entries + * 3 - 4 valid entries + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries, + MLXSW_REG_RAUHTD_BASE_LEN, 28, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +/* reg_rauhtd_rec_type + * Record type. + * 0 - IPv4 + * 1 - IPv6 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8 + +/* reg_rauhtd_ipv4_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_dip + * Destination IPv4 address. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); + +static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, + int ent_index, u16 *p_rif, + u32 *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index); + *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -3420,6 +4633,123 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MPAT - Monitoring Port Analyzer Table + * ------------------------------------- + * MPAT Register is used to query and configure the Switch PortAnalyzer Table. + * For an enabled analyzer, all fields except e (enable) cannot be modified. + */ +#define MLXSW_REG_MPAT_ID 0x901A +#define MLXSW_REG_MPAT_LEN 0x78 + +static const struct mlxsw_reg_info mlxsw_reg_mpat = { + .id = MLXSW_REG_MPAT_ID, + .len = MLXSW_REG_MPAT_LEN, +}; + +/* reg_mpat_pa_id + * Port Analyzer ID. + * Access: Index + */ +MLXSW_ITEM32(reg, mpat, pa_id, 0x00, 28, 4); + +/* reg_mpat_system_port + * A unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, system_port, 0x00, 0, 16); + +/* reg_mpat_e + * Enable. Indicating the Port Analyzer is enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, e, 0x04, 31, 1); + +/* reg_mpat_qos + * Quality Of Service Mode. + * 0: CONFIGURED - QoS parameters (Switch Priority, and encapsulation + * PCP, DEI, DSCP or VL) are configured. + * 1: MAINTAIN - QoS parameters (Switch Priority, Color) are the + * same as in the original packet that has triggered the mirroring. For + * SPAN also the pcp,dei are maintained. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1); + +/* reg_mpat_be + * Best effort mode. Indicates mirroring traffic should not cause packet + * drop or back pressure, but will discard the mirrored packets. Mirrored + * packets will be forwarded on a best effort manner. + * 0: Do not discard mirrored packets + * 1: Discard mirrored packets if causing congestion + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1); + +static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, + u16 system_port, bool e) +{ + MLXSW_REG_ZERO(mpat, payload); + mlxsw_reg_mpat_pa_id_set(payload, pa_id); + mlxsw_reg_mpat_system_port_set(payload, system_port); + mlxsw_reg_mpat_e_set(payload, e); + mlxsw_reg_mpat_qos_set(payload, 1); + mlxsw_reg_mpat_be_set(payload, 1); +} + +/* MPAR - Monitoring Port Analyzer Register + * ---------------------------------------- + * MPAR register is used to query and configure the port analyzer port mirroring + * properties. + */ +#define MLXSW_REG_MPAR_ID 0x901B +#define MLXSW_REG_MPAR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mpar = { + .id = MLXSW_REG_MPAR_ID, + .len = MLXSW_REG_MPAR_LEN, +}; + +/* reg_mpar_local_port + * The local port to mirror the packets from. + * Access: Index + */ +MLXSW_ITEM32(reg, mpar, local_port, 0x00, 16, 8); + +enum mlxsw_reg_mpar_i_e { + MLXSW_REG_MPAR_TYPE_EGRESS, + MLXSW_REG_MPAR_TYPE_INGRESS, +}; + +/* reg_mpar_i_e + * Ingress/Egress + * Access: Index + */ +MLXSW_ITEM32(reg, mpar, i_e, 0x00, 0, 4); + +/* reg_mpar_enable + * Enable mirroring + * By default, port mirroring is disabled for all ports. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1); + +/* reg_mpar_pa_id + * Port Analyzer ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4); + +static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port, + enum mlxsw_reg_mpar_i_e i_e, + bool enable, u8 pa_id) +{ + MLXSW_REG_ZERO(mpar, payload); + mlxsw_reg_mpar_local_port_set(payload, local_port); + mlxsw_reg_mpar_enable_set(payload, enable); + mlxsw_reg_mpar_i_e_set(payload, i_e); + mlxsw_reg_mpar_pa_id_set(payload, pa_id); +} + /* MLCR - Management LED Control Register * -------------------------------------- * Controls the system LEDs. @@ -3849,6 +5179,45 @@ static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index); } +/* SBIB - Shared Buffer Internal Buffer Register + * --------------------------------------------- + * The SBIB register configures per port buffers for internal use. The internal + * buffers consume memory on the port buffers (note that the port buffers are + * used also by PBMC). + * + * For Spectrum this is used for egress mirroring. + */ +#define MLXSW_REG_SBIB_ID 0xB006 +#define MLXSW_REG_SBIB_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_sbib = { + .id = MLXSW_REG_SBIB_ID, + .len = MLXSW_REG_SBIB_LEN, +}; + +/* reg_sbib_local_port + * Local port number + * Not supported for CPU port and router port + * Access: Index + */ +MLXSW_ITEM32(reg, sbib, local_port, 0x00, 16, 8); + +/* reg_sbib_buff_size + * Units represented in cells + * Allowed range is 0 to (cap_max_headroom_size - 1) + * Default is 0 + * Access: RW + */ +MLXSW_ITEM32(reg, sbib, buff_size, 0x08, 0, 24); + +static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port, + u32 buff_size) +{ + MLXSW_REG_ZERO(sbib, payload); + mlxsw_reg_sbib_local_port_set(payload, local_port); + mlxsw_reg_sbib_buff_size_set(payload, buff_size); +} + static inline const char *mlxsw_reg_id_str(u16 reg_id) { switch (reg_id) { @@ -3924,6 +5293,26 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_RGCR_ID: + return "RGCR"; + case MLXSW_REG_RITR_ID: + return "RITR"; + case MLXSW_REG_RATR_ID: + return "RATR"; + case MLXSW_REG_RALTA_ID: + return "RALTA"; + case MLXSW_REG_RALST_ID: + return "RALST"; + case MLXSW_REG_RALTB_ID: + return "RALTB"; + case MLXSW_REG_RALUE_ID: + return "RALUE"; + case MLXSW_REG_RAUHT_ID: + return "RAUHT"; + case MLXSW_REG_RALEU_ID: + return "RALEU"; + case MLXSW_REG_RAUHTD_ID: + return "RAUHTD"; case MLXSW_REG_MFCR_ID: return "MFCR"; case MLXSW_REG_MFSC_ID: @@ -3932,6 +5321,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "MFSM"; case MLXSW_REG_MTCAP_ID: return "MTCAP"; + case MLXSW_REG_MPAT_ID: + return "MPAT"; + case MLXSW_REG_MPAR_ID: + return "MPAR"; case MLXSW_REG_MTMP_ID: return "MTMP"; case MLXSW_REG_MLCR_ID: @@ -3946,6 +5339,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SBMM"; case MLXSW_REG_SBSR_ID: return "SBSR"; + case MLXSW_REG_SBIB_ID: + return "SBIB"; default: return "*UNKNOWN*"; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 660429ebfbe1..c3e61500819d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -49,9 +49,13 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/list.h> +#include <linux/notifier.h> #include <linux/dcbnl.h> +#include <linux/inetdevice.h> #include <net/switchdev.h> #include <generated/utsrelease.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_mirred.h> #include "spectrum.h" #include "core.h" @@ -131,6 +135,8 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); +static bool mlxsw_sp_port_dev_check(const struct net_device *dev); + static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -159,6 +165,303 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + int i; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_span_valid) + return -EIO; + + mlxsw_sp->span.entries_count = resources->max_span; + mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, + sizeof(struct mlxsw_sp_span_entry), + GFP_KERNEL); + if (!mlxsw_sp->span.entries) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) + INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); + + return 0; +} + +static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); + } + kfree(mlxsw_sp->span.entries); +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + u8 local_port = port->local_port; + int index; + int i; + int err; + + /* find a free entry to use */ + index = -1; + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (!mlxsw_sp->span.entries[i].used) { + index = i; + span_entry = &mlxsw_sp->span.entries[i]; + break; + } + } + if (index < 0) + return NULL; + + /* create a new port analayzer entry for local_port */ + mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + if (err) + return NULL; + + span_entry->used = true; + span_entry->id = index; + span_entry->ref_count = 0; + span_entry->local_port = local_port; + return span_entry; +} + +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + u8 local_port = span_entry->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + span_entry->used = false; +} + +struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + if (curr->used && curr->local_port == port->local_port) + return curr; + } + return NULL; +} + +struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(port); + if (span_entry) { + span_entry->ref_count++; + return span_entry; + } + + return mlxsw_sp_span_entry_create(port); +} + +static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + if (--span_entry->ref_count == 0) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; +} + +static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_inspected_port *p; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + list_for_each_entry(p, &curr->bound_ports_list, list) + if (p->local_port == port->local_port && + p->type == MLXSW_SP_SPAN_EGRESS) + return true; + } + + return false; +} + +static int mlxsw_sp_span_mtu_to_buffsize(int mtu) +{ + return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1; +} + +static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* If port is egress mirrored, the shared buffer size should be + * updated according to the mtu value + */ + if (mlxsw_sp_span_is_egress_mirror(port)) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, + mlxsw_sp_span_mtu_to_buffsize(mtu)); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); + return err; + } + } + + return 0; +} + +static struct mlxsw_sp_span_inspected_port * +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry) +{ + struct mlxsw_sp_span_inspected_port *p; + + list_for_each_entry(p, &span_entry->bound_ports_list, list) + if (port->local_port == p->local_port) + return p; + return NULL; +} + +static int +mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int pa_id = span_entry->id; + int err; + + /* if it is an egress SPAN, bind a shared buffer to it */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, + mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu)); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); + return err; + } + } + + /* bind the port to the SPAN entry */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); + if (err) + goto err_mpar_reg_write; + + inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); + if (!inspected_port) { + err = -ENOMEM; + goto err_inspected_port_alloc; + } + inspected_port->local_port = port->local_port; + inspected_port->type = type; + list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); + + return 0; + +err_mpar_reg_write: +err_inspected_port_alloc: + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + return err; +} + +static void +mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int pa_id = span_entry->id; + + inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + if (!inspected_port) + return; + + /* remove the inspected port */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); + + /* remove the SBIB buffer if it was egress SPAN */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + + list_del(&inspected_port->list); + kfree(inspected_port); +} + +static int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + int err; + + span_entry = mlxsw_sp_span_entry_get(to); + if (!span_entry) + return -ENOENT; + + netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", + span_entry->id); + + err = mlxsw_sp_span_inspected_port_bind(from, span_entry, type); + if (err) + goto err_port_bind; + + return 0; + +err_port_bind: + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + return err; +} + +static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(to); + if (!span_entry) { + netdev_err(from->dev, "no span entry found\n"); + return; + } + + netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", + span_entry->id); + mlxsw_sp_span_inspected_port_unbind(from, span_entry, type); +} + static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_up) { @@ -171,23 +474,6 @@ static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); } -static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port, - bool *p_is_up) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char paos_pl[MLXSW_REG_PAOS_LEN]; - u8 oper_status; - int err; - - mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); - if (err) - return err; - oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); - *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; - return 0; -} - static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned char *addr) { @@ -209,23 +495,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); } -static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, enum mlxsw_reg_spms_state state) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char *spms_pl; - int err; - - spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); - if (!spms_pl) - return -ENOMEM; - mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); - mlxsw_reg_spms_vid_pack(spms_pl, vid, state); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); - kfree(spms_pl); - return err; -} - static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -525,6 +794,9 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en); if (err) return err; + err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu); + if (err) + goto err_span_port_mtu_update; err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); if (err) goto err_port_mtu_set; @@ -532,6 +804,8 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) return 0; err_port_mtu_set: + mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu); +err_span_port_mtu_update: mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); return err; } @@ -636,94 +910,8 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static struct mlxsw_sp_vfid * -mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid) -{ - struct mlxsw_sp_vfid *vfid; - - list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) { - if (vfid->vid == vid) - return vfid; - } - - return NULL; -} - -static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) -{ - return find_first_zero_bit(mlxsw_sp->port_vfids.mapped, - MLXSW_SP_VFID_PORT_MAX); -} - -static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, - u16 vid) -{ - struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_vfid *vfid; - u16 n_vfid; - int err; - - n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); - if (n_vfid == MLXSW_SP_VFID_PORT_MAX) { - dev_err(dev, "No available vFIDs\n"); - return ERR_PTR(-ERANGE); - } - - err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); - if (err) { - dev_err(dev, "Failed to create vFID=%d\n", n_vfid); - return ERR_PTR(err); - } - - vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); - if (!vfid) - goto err_allocate_vfid; - - vfid->vfid = n_vfid; - vfid->vid = vid; - - list_add(&vfid->list, &mlxsw_sp->port_vfids.list); - set_bit(n_vfid, mlxsw_sp->port_vfids.mapped); - - return vfid; - -err_allocate_vfid: - __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); - return ERR_PTR(-ENOMEM); -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vfid *vfid) -{ - clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped); - list_del(&vfid->list); - - __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); - - kfree(vfid); -} - static struct mlxsw_sp_port * -mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_vfid *vfid) +mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_vport; @@ -741,8 +929,7 @@ mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING; mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged; mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id; - mlxsw_sp_vport->vport.vfid = vfid; - mlxsw_sp_vport->vport.vid = vfid->vid; + mlxsw_sp_vport->vport.vid = vid; list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list); @@ -759,9 +946,8 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_vfid *vfid; + bool untagged = vid == 1; int err; /* VLAN 0 is added to HW filter when device goes up, but it is @@ -775,31 +961,10 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, return 0; } - vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); - if (!vfid) { - vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (IS_ERR(vfid)) { - netdev_err(dev, "Failed to create vFID for VID=%d\n", - vid); - return PTR_ERR(vfid); - } - } - - mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid); + mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); if (!mlxsw_sp_vport) { netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); - err = -ENOMEM; - goto err_port_vport_create; - } - - if (!vfid->nr_vports) { - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, - true, false); - if (err) { - netdev_err(dev, "Failed to setup flooding for vFID=%d\n", - vfid->vfid); - goto err_vport_flood_set; - } + return -ENOMEM; } /* When adding the first VLAN interface on a bridged port we need to @@ -814,70 +979,37 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, } } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n", - vid, vfid->vfid); - goto err_port_vid_to_fid_set; - } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); if (err) { netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); goto err_port_vid_learning_set; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); goto err_port_add_vid; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - goto err_port_stp_state_set; - } - - vfid->nr_vports++; - return 0; -err_port_stp_state_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); err_port_add_vid: mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); err_port_vid_learning_set: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - mlxsw_sp_vfid_to_fid(vfid->vfid), vid); -err_port_vid_to_fid_set: if (list_is_singular(&mlxsw_sp_port->vports_list)) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); err_port_vp_mode_trans: - if (!vfid->nr_vports) - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, - false); -err_vport_flood_set: mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); -err_port_vport_create: - if (!vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); return err; } -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid) +static int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_vfid *vfid; + struct mlxsw_sp_fid *f; int err; /* VLAN 0 is removed from HW filter when device goes down, but @@ -892,15 +1024,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; } - vfid = mlxsw_sp_vport->vport.vfid; - - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_DISCARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - return err; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", @@ -914,16 +1037,12 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, return err; } - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n", - vid, vfid->vfid); - return err; - } + /* Drop FID reference. If this was the last reference the + * resources will be freed. + */ + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f && !WARN_ON(!f->leave)) + f->leave(mlxsw_sp_vport); /* When removing the last VLAN interface on a bridged port we need to * transition all active 802.1Q bridge VLANs to use VID to FID @@ -937,13 +1056,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, } } - vfid->nr_vports--; mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); - /* Destroy the vFID if no vPorts are assigned to it anymore. */ - if (!vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid); - return 0; } @@ -968,16 +1082,162 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, return 0; } +static struct mlxsw_sp_port_mall_tc_entry * +mlxsw_sp_port_mirror_entry_find(struct mlxsw_sp_port *port, + unsigned long cookie) { + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + + list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list) + if (mall_tc_entry->cookie == cookie) + return mall_tc_entry; + + return NULL; +} + +static int +mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *cls, + const struct tc_action *a, + bool ingress) +{ + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + struct net *net = dev_net(mlxsw_sp_port->dev); + enum mlxsw_sp_span_type span_type; + struct mlxsw_sp_port *to_port; + struct net_device *to_dev; + int ifindex; + int err; + + ifindex = tcf_mirred_ifindex(a); + to_dev = __dev_get_by_index(net, ifindex); + if (!to_dev) { + netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n"); + return -EINVAL; + } + + if (!mlxsw_sp_port_dev_check(to_dev)) { + netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port"); + return -ENOTSUPP; + } + to_port = netdev_priv(to_dev); + + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR; + mall_tc_entry->mirror.to_local_port = to_port->local_port; + mall_tc_entry->mirror.ingress = ingress; + list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list); + + span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + err = mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type); + if (err) + goto err_mirror_add; + return 0; + +err_mirror_add: + list_del(&mall_tc_entry->list); + kfree(mall_tc_entry); + return err; +} + +static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + __be16 protocol, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + const struct tc_action *a; + int err; + + if (!tc_single_action(cls->exts)) { + netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); + return -ENOTSUPP; + } + + tc_for_each_action(a, cls->exts) { + if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) + return -ENOTSUPP; + + err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls, + a, ingress); + if (err) + return err; + } + + return 0; +} + +static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *cls) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + enum mlxsw_sp_span_type span_type; + struct mlxsw_sp_port *to_port; + + mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port, + cls->cookie); + if (!mall_tc_entry) { + netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); + return; + } + + switch (mall_tc_entry->type) { + case MLXSW_SP_PORT_MALL_MIRROR: + to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port]; + span_type = mall_tc_entry->mirror.ingress ? + MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + break; + default: + WARN_ON(1); + } + + list_del(&mall_tc_entry->list); + kfree(mall_tc_entry); +} + +static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + + if (tc->type == TC_SETUP_MATCHALL) { + switch (tc->cls_mall->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, + proto, + tc->cls_mall, + ingress); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, + tc->cls_mall); + return 0; + default: + return -EINVAL; + } + } + + return -ENOTSUPP; +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_setup_tc = mlxsw_sp_setup_tc, .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, + .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, @@ -1072,7 +1332,7 @@ struct mlxsw_sp_port_hw_stats { u64 (*getter)(char *payload); }; -static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { { .str = "a_frames_transmitted_ok", .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, @@ -1153,6 +1413,90 @@ static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { + { + .str = "rx_octets_prio", + .getter = mlxsw_reg_ppcnt_rx_octets_get, + }, + { + .str = "rx_frames_prio", + .getter = mlxsw_reg_ppcnt_rx_frames_get, + }, + { + .str = "tx_octets_prio", + .getter = mlxsw_reg_ppcnt_tx_octets_get, + }, + { + .str = "tx_frames_prio", + .getter = mlxsw_reg_ppcnt_tx_frames_get, + }, + { + .str = "rx_pause_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_get, + }, + { + .str = "rx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_duration_get, + }, + { + .str = "tx_pause_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_get, + }, + { + .str = "tx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_duration_get, + }, +}; + +#define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) + +static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(char *ppcnt_pl) +{ + u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + + return MLXSW_SP_CELLS_TO_BYTES(transmit_queue); +} + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { + { + .str = "tc_transmit_queue_tc", + .getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get, + }, + { + .str = "tc_no_buffer_discard_uc_tc", + .getter = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get, + }, +}; + +#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats) + +#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \ + (MLXSW_SP_PORT_HW_PRIO_STATS_LEN + \ + MLXSW_SP_PORT_HW_TC_STATS_LEN) * \ + IEEE_8021QAZ_MAX_TCS) + +static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + mlxsw_sp_port_hw_prio_stats[i].str, prio); + *p += ETH_GSTRING_LEN; + } +} + +static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + mlxsw_sp_port_hw_tc_stats[i].str, tc); + *p += ETH_GSTRING_LEN; + } +} + static void mlxsw_sp_port_get_strings(struct net_device *dev, u32 stringset, u8 *data) { @@ -1166,6 +1510,13 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_get_prio_strings(&p, i); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_get_tc_strings(&p, i); + break; } } @@ -1193,27 +1544,80 @@ static int mlxsw_sp_port_set_phys_id(struct net_device *dev, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); } -static void mlxsw_sp_port_get_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) +static int +mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, + int *p_len, enum mlxsw_reg_ppcnt_grp grp) +{ + switch (grp) { + case MLXSW_REG_PPCNT_IEEE_8023_CNT: + *p_hw_stats = mlxsw_sp_port_hw_stats; + *p_len = MLXSW_SP_PORT_HW_STATS_LEN; + break; + case MLXSW_REG_PPCNT_PRIO_CNT: + *p_hw_stats = mlxsw_sp_port_hw_prio_stats; + *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + break; + case MLXSW_REG_PPCNT_TC_CNT: + *p_hw_stats = mlxsw_sp_port_hw_tc_stats; + *p_len = MLXSW_SP_PORT_HW_TC_STATS_LEN; + break; + default: + WARN_ON(1); + return -ENOTSUPP; + } + return 0; +} + +static void __mlxsw_sp_port_get_stats(struct net_device *dev, + enum mlxsw_reg_ppcnt_grp grp, int prio, + u64 *data, int data_index) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; - int i; + int i, len; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, - MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); + err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); + if (err) + return; + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); - for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) - data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0; + for (i = 0; i < len; i++) + data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; +} + +static void mlxsw_sp_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i, data_index = 0; + + /* IEEE 802.3 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, 0, + data, data_index); + data_index = MLXSW_SP_PORT_HW_STATS_LEN; + + /* Per-Priority Counters */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + } + + /* Per-TC Counters */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_TC_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN; + } } static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return MLXSW_SP_PORT_HW_STATS_LEN; + return MLXSW_SP_PORT_ETHTOOL_STATS_LEN; default: return -EOPNOTSUPP; } @@ -1434,7 +1838,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | - SUPPORTED_Pause | SUPPORTED_Asym_Pause; + SUPPORTED_Pause | SUPPORTED_Asym_Pause | + SUPPORTED_Autoneg; cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); @@ -1493,7 +1898,6 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, u32 eth_proto_new; u32 eth_proto_cap; u32 eth_proto_admin; - bool is_up; int err; speed = ethtool_cmd_speed(cmd); @@ -1525,12 +1929,7 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, return err; } - err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up); - if (err) { - netdev_err(dev, "Failed to get oper status"); - return err; - } - if (!is_up) + if (!netif_running(dev)) return 0; err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); @@ -1708,6 +2107,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_untagged_vlans_alloc; } INIT_LIST_HEAD(&mlxsw_sp_port->vports_list); + INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list); mlxsw_sp_port->pcpu_stats = netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); @@ -1729,7 +2129,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_FILTER; + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; + dev->hw_features |= NETIF_F_HW_TC; /* Each packet needs to have a Tx header (metadata) on top all other * headers. @@ -1838,23 +2239,6 @@ err_port_active_vlans_alloc: return err; } -static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - struct mlxsw_sp_port *mlxsw_sp_vport, *tmp; - - list_for_each_entry_safe(mlxsw_sp_vport, tmp, - &mlxsw_sp_port->vports_list, vport.list) { - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - /* vPorts created for VLAN devices should already be gone - * by now, since we unregistered the port netdev. - */ - WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev)); - mlxsw_sp_port_kill_vid(dev, 0, vid); - } -} - static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -1865,13 +2249,14 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_vports_fini(mlxsw_sp_port); + mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); + WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); free_netdev(mlxsw_sp_port->dev); } @@ -2108,11 +2493,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, local_port = mlxsw_reg_pude_local_port_get(pude_pl); mlxsw_sp_port = mlxsw_sp->ports[local_port]; - if (!mlxsw_sp_port) { - dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n", - local_port); + if (!mlxsw_sp_port) return; - } status = mlxsw_reg_pude_oper_status_get(pude_pl); if (status == MLXSW_PORT_OPER_STATUS_UP) { @@ -2267,6 +2649,31 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPBC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPUC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IP2ME, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4, + }, }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) @@ -2307,7 +2714,7 @@ err_rx_trap_set: mlxsw_sp); err_rx_listener_register: for (i--; i >= 0; i--) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2324,7 +2731,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2403,8 +2810,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; - INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list); - INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->fids); + INIT_LIST_HEAD(&mlxsw_sp->vfids.list); INIT_LIST_HEAD(&mlxsw_sp->br_mids.list); err = mlxsw_sp_base_mac_get(mlxsw_sp); @@ -2413,16 +2820,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return err; } - err = mlxsw_sp_ports_create(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); - return err; - } - err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n"); - goto err_event_register; + return err; } err = mlxsw_sp_traps_init(mlxsw_sp); @@ -2455,8 +2856,32 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_router_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); + goto err_router_init; + } + + err = mlxsw_sp_span_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); + goto err_span_init; + } + + err = mlxsw_sp_ports_create(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + return 0; +err_ports_create: + mlxsw_sp_span_fini(mlxsw_sp); +err_span_init: + mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); @@ -2465,20 +2890,25 @@ err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); err_rx_listener_register: mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); -err_event_register: - mlxsw_sp_ports_remove(mlxsw_sp); return err; } static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int i; + mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_span_fini(mlxsw_sp); + mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); - mlxsw_sp_ports_remove(mlxsw_sp); + WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); + WARN_ON(!list_empty(&mlxsw_sp->fids)); + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -2509,12 +2939,17 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, + .used_kvd_sizes = 1, + .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, + .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, + .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, .type = MLXSW_PORT_SWID_TYPE_ETH, } }, + .resource_query_enable = 1, }; static struct mlxsw_driver mlxsw_sp_driver = { @@ -2540,16 +2975,590 @@ static struct mlxsw_driver mlxsw_sp_driver = { .profile = &mlxsw_sp_config_profile, }; -static int -mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port) +static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + if (mlxsw_sp_port) + dev_hold(mlxsw_sp_port->dev); + rcu_read_unlock(); + return mlxsw_sp_port; +} + +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) +{ + dev_put(mlxsw_sp_port->dev); +} + +static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, + unsigned long event) +{ + switch (event) { + case NETDEV_UP: + if (!r) + return true; + r->ref_count++; + return false; + case NETDEV_DOWN: + if (r && --r->ref_count == 0) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (!mlxsw_sp->rifs[i]) + return i; + + return MLXSW_SP_RIF_MAX; +} + +static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, + bool *p_lagged, u16 *p_system_port) +{ + u8 local_port = mlxsw_sp_vport->local_port; + + *p_lagged = mlxsw_sp_vport->lagged; + *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; +} + +static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev, u16 rif, + bool create) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + bool lagged = mlxsw_sp_vport->lagged; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 system_port; + + mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif, + l3_dev->mtu, l3_dev->dev_addr); + + mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, + mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static struct mlxsw_sp_fid * +mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->leave = mlxsw_sp_vport_rif_sp_leave; + f->ref_count = 0; + f->dev = l3_dev; + f->fid = fid; + + return f; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return NULL; + + ether_addr_copy(r->addr, l3_dev->dev_addr); + r->mtu = l3_dev->mtu; + r->ref_count = 1; + r->dev = l3_dev; + r->rif = rif; + r->f = f; + + return r; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_fid *f; + struct mlxsw_sp_rif *r; + u16 fid, rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return ERR_PTR(-ERANGE); + + err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); + if (err) + return ERR_PTR(err); + + fid = mlxsw_sp_rif_sp_to_fid(rif); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + if (err) + goto err_rif_fdb_op; + + f = mlxsw_sp_rfid_alloc(fid, l3_dev); + if (!f) { + err = -ENOMEM; + goto err_rfid_alloc; + } + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + return r; + +err_rif_alloc: + kfree(f); +err_rfid_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +err_rif_fdb_op: + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); + return ERR_PTR(err); +} + +static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_rif *r) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 fid = f->fid; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + kfree(f); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); + + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); +} + +static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_rif *r; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!r) { + r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + if (IS_ERR(r)) + return PTR_ERR(r); + } + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f); + r->f->ref_count++; + + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid); + + return 0; +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r); +} + +static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + case NETDEV_DOWN: + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event) +{ + if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, + event, vid); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); +} + +static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + u16 fid; + + if (is_vlan_dev(l3_dev)) + fid = vlan_dev_vlan_id(l3_dev); + else if (mlxsw_sp->master_bridge.dev == l3_dev) + fid = 1; + else + return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); + + return mlxsw_sp_fid_find(mlxsw_sp, fid); +} + +static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return MLXSW_REG_RITR_FID_IF; + else + return MLXSW_REG_RITR_VLAN_IF; +} + +static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + u16 fid, u16 rif, + bool create) +{ + enum mlxsw_reg_ritr_if_type rif_type; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_type = mlxsw_sp_rif_type_get(fid); + mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu, + l3_dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + u16 rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return -ERANGE; + + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + if (err) + return err; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); + if (err) + goto err_rif_fdb_op; + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + netdev_dbg(l3_dev, "RIF=%d created\n", rif); + + return 0; + +err_rif_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); +err_rif_fdb_op: + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + return err; +} + +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); + + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, + struct net_device *br_dev, + unsigned long event) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + struct mlxsw_sp_fid *f; + + /* FID can either be an actual FID if the L3 device is the + * VLAN-aware bridge or a VLAN device on top. Otherwise, the + * L3 device is a VLAN-unaware bridge and we get a vFID. + */ + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + case NETDEV_DOWN: + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, + unsigned long event) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_bridge_master(real_dev) && + mlxsw_sp->master_bridge.dev == real_dev) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, + event); + + return 0; +} + +static int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(r, event)) + goto out; + + if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_inetaddr_port_event(dev, event); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_inetaddr_lag_event(dev, event); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_inetaddr_vlan_event(dev, event); + +out: + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif, + const char *mac, int mtu) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!r) + return 0; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false); + if (err) + return err; + + err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true); + if (err) + goto err_rif_fdb_op; + + ether_addr_copy(r->addr, dev->dev_addr); + r->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", r->rif); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu); +err_rif_edit: + mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true); + return err; +} + +static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, + u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return mlxsw_sp_port_vport_find_by_fid(lag_port, fid); + else + return test_bit(fid, lag_port->active_vlans); +} + +static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; + u8 local_port = mlxsw_sp_port->local_port; + u16 lag_id = mlxsw_sp_port->lag_id; + int i, count = 0; - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT); - mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port); + if (!mlxsw_sp_port->lagged) + return true; - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + struct mlxsw_sp_port *lag_port; + + lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (!lag_port || lag_port->local_port == local_port) + continue; + if (mlxsw_sp_lag_port_fid_member(lag_port, fid)) + count++; + } + + return !count; } static int @@ -2564,17 +3573,8 @@ mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, mlxsw_sp_port->local_port); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); -} - -static int -mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; - - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG); - mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using Port=%d, FID=%d\n", + mlxsw_sp_port->local_port, fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } @@ -2590,71 +3590,64 @@ mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using LAG ID=%d, FID=%d\n", + mlxsw_sp_port->lag_id, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } -static int -__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port) +int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { - int err, last_err = 0; - u16 vid; - - for (vid = 1; vid < VLAN_N_VID - 1; vid++) { - err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid); - if (err) - last_err = err; - } + if (!mlxsw_sp_port_fdb_should_flush(mlxsw_sp_port, fid)) + return 0; - return last_err; + if (mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, + fid); + else + return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid); } -static int -__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port) +static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp) { - int err, last_err = 0; - u16 vid; - - for (vid = 1; vid < VLAN_N_VID - 1; vid++) { - err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid); - if (err) - last_err = err; - } + struct mlxsw_sp_fid *f, *tmp; - return last_err; + list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list) + if (--f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); + else + WARN_ON_ONCE(1); } -static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port) +static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { - if (!list_empty(&mlxsw_sp_port->vports_list)) - if (mlxsw_sp_port->lagged) - return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port); - else - return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port); - else - if (mlxsw_sp_port->lagged) - return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port); - else - return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port); + return !mlxsw_sp->master_bridge.dev || + mlxsw_sp->master_bridge.dev == br_dev; } -static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport) +static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport); - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - - if (mlxsw_sp_vport->lagged) - return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport, - fid); - else - return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid); + mlxsw_sp->master_bridge.dev = br_dev; + mlxsw_sp->master_bridge.ref_count++; } -static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp) { - return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; + if (--mlxsw_sp->master_bridge.ref_count == 0) { + mlxsw_sp->master_bridge.dev = NULL; + /* It's possible upper VLAN devices are still holding + * references to underlying FIDs. Drop the reference + * and release the resources if it was the last one. + * If it wasn't, then something bad happened. + */ + mlxsw_sp_master_bridge_gone_sync(mlxsw_sp); + } } -static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *br_dev) { struct net_device *dev = mlxsw_sp_port->dev; int err; @@ -2668,6 +3661,8 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) if (err) return err; + mlxsw_sp_master_bridge_inc(mlxsw_sp_port->mlxsw_sp, br_dev); + mlxsw_sp_port->learning = 1; mlxsw_sp_port->learning_sync = 1; mlxsw_sp_port->uc_flood = 1; @@ -2676,16 +3671,14 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, - bool flush_fdb) +static void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; - if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port)) - netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + mlxsw_sp_master_bridge_dec(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_port->learning = 0; mlxsw_sp_port->learning_sync = 0; mlxsw_sp_port->uc_flood = 0; @@ -2694,28 +3687,7 @@ static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, /* Add implicit VLAN interface in the device, so that untagged * packets will be classified to the default vFID. */ - return mlxsw_sp_port_add_vid(dev, 0, 1); -} - -static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - return !mlxsw_sp->master_bridge.dev || - mlxsw_sp->master_bridge.dev == br_dev; -} - -static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - mlxsw_sp->master_bridge.dev = br_dev; - mlxsw_sp->master_bridge.ref_count++; -} - -static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - if (--mlxsw_sp->master_bridge.ref_count == 0) - mlxsw_sp->master_bridge.dev = NULL; + mlxsw_sp_port_add_vid(dev, 0, 1); } static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) @@ -2831,6 +3803,45 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } +static void +mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + /* If vPort is assigned a RIF, then leave it since it's no + * longer valid. + */ + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lag_id = lag_id; + mlxsw_sp_vport->lagged = 1; +} + +static void +mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lagged = 0; +} + static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { @@ -2866,6 +3877,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lag_id = lag_id; mlxsw_sp_port->lagged = 1; lag->ref_count++; + + mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id); + return 0; err_col_port_enable: @@ -2876,65 +3890,35 @@ err_col_port_add: return err; } -static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev, - bool flush_fdb); - -static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *lag_dev) +static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_upper *lag; u16 lag_id = mlxsw_sp_port->lag_id; - int err; + struct mlxsw_sp_upper *lag; if (!mlxsw_sp_port->lagged) - return 0; + return; lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); - if (err) - return err; - err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); - if (err) - return err; - - /* In case we leave a LAG device that has bridges built on top, - * then their teardown sequence is never issued and we need to - * invoke the necessary cleanup routines ourselves. - */ - list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, - vport.list) { - struct net_device *br_dev; - - if (!mlxsw_sp_vport->bridged) - continue; - - br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false); - } + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); if (mlxsw_sp_port->bridged) { mlxsw_sp_port_active_vlans_del(mlxsw_sp_port); - mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false); - mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port); } - if (lag->ref_count == 1) { - if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port)) - netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); - err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); - if (err) - return err; - } + if (lag->ref_count == 1) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, mlxsw_sp_port->local_port); mlxsw_sp_port->lagged = 0; lag->ref_count--; - return 0; + + mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port); } static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2983,42 +3967,25 @@ static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid = vlan_dev_vlan_id(vlan_dev); mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); + if (WARN_ON(!mlxsw_sp_vport)) return -EINVAL; - } mlxsw_sp_vport->dev = vlan_dev; return 0; } -static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *vlan_dev) +static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) { struct mlxsw_sp_port *mlxsw_sp_vport; u16 vid = vlan_dev_vlan_id(vlan_dev); mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); - return -EINVAL; - } - - /* When removing a VLAN device while still bridged we should first - * remove it from the bridge, as we receive the bridge's notification - * when the vPort is already gone. - */ - if (mlxsw_sp_vport->bridged) { - struct net_device *br_dev; - - br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true); - } + if (WARN_ON(!mlxsw_sp_vport)) + return; mlxsw_sp_vport->dev = mlxsw_sp_port->dev; - - return 0; } static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, @@ -3028,7 +3995,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; - int err; + int err = 0; mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -3037,73 +4004,56 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master || !info->linking) + if (!is_vlan_dev(upper_dev) && + !netif_is_lag_master(upper_dev) && + !netif_is_bridge_master(upper_dev)) + return -EINVAL; + if (!info->linking) break; /* HW limitation forbids to put ports to multiple bridges. */ if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) - return NOTIFY_BAD; + return -EINVAL; if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, info->upper_info)) - return NOTIFY_BAD; + return -EINVAL; + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + return -EINVAL; + if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (is_vlan_dev(upper_dev)) { - if (info->linking) { + if (info->linking) err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, upper_dev); - if (err) { - netdev_err(dev, "Failed to link VLAN device\n"); - return NOTIFY_BAD; - } - } else { - err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, - upper_dev); - if (err) { - netdev_err(dev, "Failed to unlink VLAN device\n"); - return NOTIFY_BAD; - } - } + else + mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); } else if (netif_is_bridge_master(upper_dev)) { - if (info->linking) { - err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to join bridge\n"); - return NOTIFY_BAD; - } - mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev); - } else { - err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port, - true); - mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); - if (err) { - netdev_err(dev, "Failed to leave bridge\n"); - return NOTIFY_BAD; - } - } + if (info->linking) + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + upper_dev); + else + mlxsw_sp_port_bridge_leave(mlxsw_sp_port); } else if (netif_is_lag_master(upper_dev)) { - if (info->linking) { + if (info->linking) err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); - if (err) { - netdev_err(dev, "Failed to join link aggregation\n"); - return NOTIFY_BAD; - } - } else { - err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, - upper_dev); - if (err) { - netdev_err(dev, "Failed to leave link aggregation\n"); - return NOTIFY_BAD; - } - } + else + mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + } else { + err = -EINVAL; + WARN_ON(1); } break; } - return NOTIFY_DONE; + return err; } static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, @@ -3127,7 +4077,7 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, break; } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_port_event(struct net_device *dev, @@ -3141,7 +4091,7 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *dev, return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, @@ -3154,218 +4104,230 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); - if (ret == NOTIFY_BAD) + if (ret) return ret; } } - return NOTIFY_DONE; + return 0; } -static struct mlxsw_sp_vfid * -mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev) +static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) { - struct mlxsw_sp_vfid *vfid; + u16 fid = vlan_dev_vlan_id(vlan_dev); + struct mlxsw_sp_fid *f; - list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) { - if (vfid->br_dev == br_dev) - return vfid; + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (!f) { + f = mlxsw_sp_fid_create(mlxsw_sp, fid); + if (IS_ERR(f)) + return PTR_ERR(f); } - return NULL; + f->ref_count++; + + return 0; } -static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid) +static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) { - return vfid - MLXSW_SP_VFID_PORT_MAX; + u16 fid = vlan_dev_vlan_id(vlan_dev); + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (f && f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f && --f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); } -static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid) +static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, + unsigned long event, void *ptr) { - return MLXSW_SP_VFID_PORT_MAX + br_vfid; + struct netdev_notifier_changeupper_info *info; + struct net_device *upper_dev; + struct mlxsw_sp *mlxsw_sp; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return 0; + if (br_dev != mlxsw_sp->master_bridge.dev) + return 0; + + info = ptr; + + switch (event) { + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (!is_vlan_dev(upper_dev)) + break; + if (info->linking) { + err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, + upper_dev); + if (err) + return err; + } else { + mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + } + break; + } + + return 0; } -static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp) +static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) { - return find_first_zero_bit(mlxsw_sp->br_vfids.mapped, - MLXSW_SP_VFID_BR_MAX); + return find_first_zero_bit(mlxsw_sp->vfids.mapped, + MLXSW_SP_VFID_MAX); } -static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) +static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_vfid *vfid; - u16 n_vfid; + struct mlxsw_sp_fid *f; + u16 vfid, fid; int err; - n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp)); - if (n_vfid == MLXSW_SP_VFID_MAX) { + vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); + if (vfid == MLXSW_SP_VFID_MAX) { dev_err(dev, "No available vFIDs\n"); return ERR_PTR(-ERANGE); } - err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + fid = mlxsw_sp_vfid_to_fid(vfid); + err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true); if (err) { - dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + dev_err(dev, "Failed to create FID=%d\n", fid); return ERR_PTR(err); } - vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); - if (!vfid) + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) goto err_allocate_vfid; - vfid->vfid = n_vfid; - vfid->br_dev = br_dev; + f->leave = mlxsw_sp_vport_vfid_leave; + f->fid = fid; + f->dev = br_dev; - list_add(&vfid->list, &mlxsw_sp->br_vfids.list); - set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped); + list_add(&f->list, &mlxsw_sp->vfids.list); + set_bit(vfid, mlxsw_sp->vfids.mapped); - return vfid; + return f; err_allocate_vfid: - __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + mlxsw_sp_vfid_op(mlxsw_sp, fid, false); return ERR_PTR(-ENOMEM); } -static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vfid *vfid) +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *f) { - u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid); + u16 vfid = mlxsw_sp_fid_to_vfid(f->fid); + u16 fid = f->fid; - clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped); - list_del(&vfid->list); + clear_bit(vfid, mlxsw_sp->vfids.mapped); + list_del(&f->list); - __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); - kfree(vfid); + kfree(f); + + mlxsw_sp_vfid_op(mlxsw_sp, fid, false); } -static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev, - bool flush_fdb) +static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool valid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - struct net_device *dev = mlxsw_sp_vport->dev; - struct mlxsw_sp_vfid *vfid, *new_vfid; - int err; - vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); - if (!vfid) { - WARN_ON(!vfid); - return -EINVAL; - } - - /* We need a vFID to go back to after leaving the bridge's vFID. */ - new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); - if (!new_vfid) { - new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (IS_ERR(new_vfid)) { - netdev_err(dev, "Failed to create vFID for VID=%d\n", - vid); - return PTR_ERR(new_vfid); - } - } - - /* Invalidate existing {Port, VID} to vFID mapping and create a new - * one for the new vFID. - */ - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", - vfid->vfid); - goto err_port_vid_to_fid_invalidate; - } + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid, + vid); +} - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(new_vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", - new_vfid->vfid); - goto err_port_vid_to_fid_validate; - } +static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp_fid *f; + int err; - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning\n"); - goto err_port_vid_learning_set; + f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev); + if (!f) { + f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev); + if (IS_ERR(f)) + return PTR_ERR(f); } - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, - false); - if (err) { - netdev_err(dev, "Failed clear to clear flooding\n"); + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true); + if (err) goto err_vport_flood_set; - } - - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); - if (err) { - netdev_err(dev, "Failed to set STP state\n"); - goto err_port_stp_state_set; - } - if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport)) - netdev_err(dev, "Failed to flush FDB\n"); + err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true); + if (err) + goto err_vport_fid_map; - /* Switch between the vFIDs and destroy the old one if needed. */ - new_vfid->nr_vports++; - mlxsw_sp_vport->vport.vfid = new_vfid; - vfid->nr_vports--; - if (!vfid->nr_vports) - mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f); + f->ref_count++; - mlxsw_sp_vport->learning = 0; - mlxsw_sp_vport->learning_sync = 0; - mlxsw_sp_vport->uc_flood = 0; - mlxsw_sp_vport->bridged = 0; + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", f->fid); return 0; -err_port_stp_state_set: +err_vport_fid_map: + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); err_vport_flood_set: -err_port_vid_learning_set: -err_port_vid_to_fid_validate: -err_port_vid_to_fid_invalidate: - /* Rollback vFID only if new. */ - if (!new_vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid); + if (!f->ref_count) + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); return err; } +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false); + + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); + + mlxsw_sp_port_fdb_flush(mlxsw_sp_vport, f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); +} + static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, struct net_device *br_dev) { - struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid; - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); struct net_device *dev = mlxsw_sp_vport->dev; - struct mlxsw_sp_vfid *vfid; int err; - vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); - if (!vfid) { - vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev); - if (IS_ERR(vfid)) { - netdev_err(dev, "Failed to create bridge vFID\n"); - return PTR_ERR(vfid); - } - } + if (f && !WARN_ON(!f->leave)) + f->leave(mlxsw_sp_vport); - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false); + err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev); if (err) { - netdev_err(dev, "Failed to setup flooding for vFID=%d\n", - vfid->vfid); - goto err_port_flood_set; + netdev_err(dev, "Failed to join vFID\n"); + return err; } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); @@ -3374,38 +4336,6 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, goto err_port_vid_learning_set; } - /* We need to invalidate existing {Port, VID} to vFID mapping and - * create a new one for the bridge's vFID. - */ - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(old_vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", - old_vfid->vfid); - goto err_port_vid_to_fid_invalidate; - } - - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", - vfid->vfid); - goto err_port_vid_to_fid_validate; - } - - /* Switch between the vFIDs and destroy the old one if needed. */ - vfid->nr_vports++; - mlxsw_sp_vport->vport.vfid = vfid; - old_vfid->nr_vports--; - if (!old_vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid); - mlxsw_sp_vport->learning = 1; mlxsw_sp_vport->learning_sync = 1; mlxsw_sp_vport->uc_flood = 1; @@ -3413,20 +4343,25 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, return 0; -err_port_vid_to_fid_validate: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid); -err_port_vid_to_fid_invalidate: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); err_port_vid_learning_set: - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false); -err_port_flood_set: - if (!vfid->nr_vports) - mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); return err; } +static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); + + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); + + mlxsw_sp_vport->learning = 0; + mlxsw_sp_vport->learning_sync = 0; + mlxsw_sp_vport->uc_flood = 0; + mlxsw_sp_vport->bridged = 0; +} + static bool mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, const struct net_device *br_dev) @@ -3435,7 +4370,9 @@ mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, vport.list) { - if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev) + struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport); + + if (dev && dev == br_dev) return false; } @@ -3450,56 +4387,39 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, struct netdev_notifier_changeupper_info *info = ptr; struct mlxsw_sp_port *mlxsw_sp_vport; struct net_device *upper_dev; - int err; + int err = 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master || !info->linking) - break; if (!netif_is_bridge_master(upper_dev)) - return NOTIFY_BAD; + return -EINVAL; + if (!info->linking) + break; /* We can't have multiple VLAN interfaces configured on * the same port and being members in the same bridge. */ if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, upper_dev)) - return NOTIFY_BAD; + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master) - break; if (info->linking) { - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); - return NOTIFY_BAD; - } + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, upper_dev); - if (err) { - netdev_err(dev, "Failed to join bridge\n"); - return NOTIFY_BAD; - } } else { - /* We ignore bridge's unlinking notifications if vPort - * is gone, since we already left the bridge when the - * VLAN device was unlinked from the real device. - */ if (!mlxsw_sp_vport) - return NOTIFY_DONE; - err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, - upper_dev, true); - if (err) { - netdev_err(dev, "Failed to leave bridge\n"); - return NOTIFY_BAD; - } + return 0; + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); } } - return NOTIFY_DONE; + return err; } static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, @@ -3514,12 +4434,12 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr, vid); - if (ret == NOTIFY_BAD) + if (ret) return ret; } } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, @@ -3535,35 +4455,44 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, vid); - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + int err = 0; - if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_netdevice_port_event(dev, event, ptr); - - if (netif_is_lag_master(dev)) - return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + err = mlxsw_sp_netdevice_router_port_event(dev); + else if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_netdevice_port_event(dev, event, ptr); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); - if (is_vlan_dev(dev)) - return mlxsw_sp_netdevice_vlan_event(dev, event, ptr); - - return NOTIFY_DONE; + return notifier_from_errno(err); } static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; +static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_event, + .priority = 10, /* Must be called before FIB notifier block */ +}; + static int __init mlxsw_sp_module_init(void) { int err; register_netdevice_notifier(&mlxsw_sp_netdevice_nb); + register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) goto err_core_driver_register; @@ -3577,6 +4506,7 @@ err_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { mlxsw_core_driver_unregister(&mlxsw_sp_driver); + unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 13b30eaa13d4..f69aa37d1521 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -39,19 +39,22 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> #include <linux/dcbnl.h> +#include <linux/in6.h> #include <net/switchdev.h> #include "port.h" #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID -#define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ -#define MLXSW_SP_VFID_BR_MAX 6144 /* Bridged VLAN interfaces */ -#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) +#define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ + +#define MLXSW_SP_RFID_BASE 15360 +#define MLXSW_SP_RIF_MAX 800 #define MLXSW_SP_LAG_MAX 64 #define MLXSW_SP_PORT_PER_LAG_MAX 16 @@ -60,6 +63,12 @@ #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 +#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ +#define MLXSW_SP_LPM_TREE_MAX 22 +#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) + +#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 + #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -67,6 +76,10 @@ #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ +#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ + /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. */ @@ -87,12 +100,22 @@ struct mlxsw_sp_upper { unsigned int ref_count; }; -struct mlxsw_sp_vfid { +struct mlxsw_sp_fid { + void (*leave)(struct mlxsw_sp_port *mlxsw_sp_vport); struct list_head list; - u16 nr_vports; - u16 vfid; /* Starting at 0 */ - struct net_device *br_dev; - u16 vid; + unsigned int ref_count; + struct net_device *dev; + struct mlxsw_sp_rif *r; + u16 fid; +}; + +struct mlxsw_sp_rif { + struct net_device *dev; + unsigned int ref_count; + struct mlxsw_sp_fid *f; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif; }; struct mlxsw_sp_mid { @@ -115,7 +138,17 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) static inline bool mlxsw_sp_fid_is_vfid(u16 fid) { - return fid >= MLXSW_SP_VFID_BASE; + return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE; +} + +static inline bool mlxsw_sp_fid_is_rfid(u16 fid) +{ + return fid >= MLXSW_SP_RFID_BASE; +} + +static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif) +{ + return MLXSW_SP_RFID_BASE + rif; } struct mlxsw_sp_sb_pr { @@ -152,20 +185,97 @@ struct mlxsw_sp_sb { } ports[MLXSW_PORT_MAX_PORTS]; }; -struct mlxsw_sp { +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) + +struct mlxsw_sp_prefix_usage { + DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); +}; + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +struct mlxsw_sp_lpm_tree { + u8 id; /* tree ID */ + unsigned int ref_count; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +struct mlxsw_sp_fib; + +struct mlxsw_sp_vr { + u16 id; /* virtual router ID */ + bool used; + enum mlxsw_sp_l3proto proto; + u32 tb_id; /* kernel fib table id */ + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_fib *fib; +}; + +enum mlxsw_sp_span_type { + MLXSW_SP_SPAN_EGRESS, + MLXSW_SP_SPAN_INGRESS +}; + +struct mlxsw_sp_span_inspected_port { + struct list_head list; + enum mlxsw_sp_span_type type; + u8 local_port; +}; + +struct mlxsw_sp_span_entry { + u8 local_port; + bool used; + struct list_head bound_ports_list; + int ref_count; + int id; +}; + +enum mlxsw_sp_port_mall_action_type { + MLXSW_SP_PORT_MALL_MIRROR, +}; + +struct mlxsw_sp_port_mall_mirror_tc_entry { + u8 to_local_port; + bool ingress; +}; + +struct mlxsw_sp_port_mall_tc_entry { + struct list_head list; + unsigned long cookie; + enum mlxsw_sp_port_mall_action_type type; + union { + struct mlxsw_sp_port_mall_mirror_tc_entry mirror; + }; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; + struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct rhashtable neigh_ht; struct { - struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)]; - } port_vfids; + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_group_list; + struct list_head nexthop_neighs_list; +}; + +struct mlxsw_sp { struct { struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)]; - } br_vfids; + DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX); + } vfids; struct { struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_MID_MAX)]; + DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; - unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; + struct list_head fids; /* VLAN-aware bridge FIDs */ + struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -183,6 +293,15 @@ struct mlxsw_sp { struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; + struct mlxsw_sp_router router; + struct { + DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); + } kvdl; + + struct { + struct mlxsw_sp_span_entry *entries; + int entries_count; + } span; }; static inline struct mlxsw_sp_upper * @@ -217,7 +336,7 @@ struct mlxsw_sp_port { u16 lag_id; struct { struct list_head list; - struct mlxsw_sp_vfid *vfid; + struct mlxsw_sp_fid *f; u16 vid; } vport; struct { @@ -239,8 +358,13 @@ struct mlxsw_sp_port { unsigned long *untagged_vlans; /* VLAN interfaces */ struct list_head vports_list; + /* TC handles */ + struct list_head mall_tc_list; }; +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -259,28 +383,38 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; } +static inline u16 +mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vid; +} + static inline bool mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port) { - return mlxsw_sp_port->vport.vfid; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + + return vid != 0; } -static inline struct net_device * -mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline void mlxsw_sp_vport_fid_set(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_fid *f) { - return mlxsw_sp_vport->vport.vfid->br_dev; + mlxsw_sp_vport->vport.f = f; } -static inline u16 -mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline struct mlxsw_sp_fid * +mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) { - return mlxsw_sp_vport->vport.vid; + return mlxsw_sp_vport->vport.f; } -static inline u16 -mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline struct net_device * +mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport) { - return mlxsw_sp_vport->vport.vfid->vfid; + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + return f ? f->dev : NULL; } static inline struct mlxsw_sp_port * @@ -298,20 +432,60 @@ mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) } static inline struct mlxsw_sp_port * -mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 vfid) +mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) { struct mlxsw_sp_port *mlxsw_sp_vport; list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, vport.list) { - if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid) + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + if (f && f->fid == fid) return mlxsw_sp_vport; } return NULL; } +static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->fids, list) + if (f->fid == fid) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_fid * +mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->vfids.list, list) + if (f->dev == br_dev) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) + return mlxsw_sp->rifs[i]; + + return NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, @@ -364,12 +538,17 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid); -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, - bool set, bool only_uc); +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid); +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding); +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -399,4 +578,19 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) #endif +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans); +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4); +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n); +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n); + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index a3720a0fad7d..074cdda7b6f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -194,7 +194,7 @@ static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0); + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), pptb_pl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 0b323661c0b6..01cfb7512827 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -103,7 +103,8 @@ static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]); + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), pptb_pl); } @@ -249,6 +250,7 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, return err; memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets)); + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; return 0; } @@ -351,7 +353,8 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err; - if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) { + if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) && + pfc->pfc_en) { netdev_err(dev, "PAUSE frames already enabled on port\n"); return -EINVAL; } @@ -371,6 +374,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, } memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc)); + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c new file mode 100644 index 000000000000..ac321e8e5c1a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -0,0 +1,91 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP_KVDL_SINGLE_BASE 0 +#define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_CHUNKS_BASE \ + (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_CHUNK_MAX 32 + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +{ + int entry_index; + int size; + int type_base; + int type_size; + int type_entries; + + if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { + return -EINVAL; + } else if (entry_count == 1) { + type_base = MLXSW_SP_KVDL_SINGLE_BASE; + type_size = MLXSW_SP_KVDL_SINGLE_SIZE; + type_entries = 1; + } else { + type_base = MLXSW_SP_KVDL_CHUNKS_BASE; + type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; + type_entries = MLXSW_SP_CHUNK_MAX; + } + + entry_index = type_base; + size = type_base + type_size; + for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { + int i; + + for (i = 0; i < type_entries; i++) + set_bit(entry_index + i, mlxsw_sp->kvdl.usage); + return entry_index; + } + return -ENOBUFS; +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +{ + int type_entries; + int i; + + if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) + type_entries = 1; + else + type_entries = MLXSW_SP_CHUNK_MAX; + for (i = 0; i < type_entries; i++) + clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c new file mode 100644 index 000000000000..81418d629231 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -0,0 +1,1814 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/rhashtable.h> +#include <linux/bitops.h> +#include <linux/in6.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/neighbour.h> +#include <net/arp.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" + +#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ + for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) + +static bool +mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + unsigned char prefix; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { + if (!test_bit(prefix, prefix_usage2->b)) + return false; + } + return true; +} + +static bool +mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static bool +mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; + + return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); +} + +static void +mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static void +mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + memset(prefix_usage, 0, sizeof(*prefix_usage)); +} + +static void +mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + set_bit(prefix_len, prefix_usage->b); +} + +static void +mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + clear_bit(prefix_len, prefix_usage->b); +} + +struct mlxsw_sp_fib_key { + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char prefix_len; +}; + +enum mlxsw_sp_fib_entry_type { + MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, + MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, + MLXSW_SP_FIB_ENTRY_TYPE_TRAP, +}; + +struct mlxsw_sp_nexthop_group; + +struct mlxsw_sp_fib_entry { + struct rhash_head ht_node; + struct mlxsw_sp_fib_key key; + enum mlxsw_sp_fib_entry_type type; + u8 added:1; + u16 rif; /* used for action local */ + struct mlxsw_sp_vr *vr; + struct list_head nexthop_group_node; + struct mlxsw_sp_nexthop_group *nh_group; +}; + +struct mlxsw_sp_fib { + struct rhashtable ht; + unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +static const struct rhashtable_params mlxsw_sp_fib_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), + .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_fib_key), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + int err; + + err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); + if (err) + return err; + if (fib->prefix_ref_count[prefix_len]++ == 0) + mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); + return 0; +} + +static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + + if (--fib->prefix_ref_count[prefix_len] == 0) + mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); + if (!fib_entry) + return NULL; + memcpy(fib_entry->key.addr, addr, addr_len); + fib_entry->key.prefix_len = prefix_len; + return fib_entry; +} + +static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) +{ + kfree(fib_entry); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) +{ + struct mlxsw_sp_fib *fib; + int err; + + fib = kzalloc(sizeof(*fib), GFP_KERNEL); + if (!fib) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); + if (err) + goto err_rhashtable_init; + return fib; + +err_rhashtable_init: + kfree(fib); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) +{ + rhashtable_destroy(&fib->ht); + kfree(fib); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) +{ + static struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->ref_count == 0) { + if (one_reserved) + one_reserved = false; + else + return lpm_tree; + } + } + return NULL; +} + +static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int +mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralst_pl[MLXSW_REG_RALST_LEN]; + u8 root_bin = 0; + u8 prefix; + u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) + root_bin = prefix; + + mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { + if (prefix == 0) + continue; + mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); + last_prefix = prefix; + } + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); + if (!lpm_tree) + return ERR_PTR(-EBUSY); + lpm_tree->proto = proto; + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + if (err) + return ERR_PTR(err); + + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, + lpm_tree); + if (err) + goto err_left_struct_set; + return lpm_tree; + +err_left_struct_set: + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + return ERR_PTR(err); +} + +static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->proto == proto && + mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, + prefix_usage)) + goto inc_ref_count; + } + lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, + proto, one_reserved); + if (IS_ERR(lpm_tree)) + return lpm_tree; + +inc_ref_count: + lpm_tree->ref_count++; + return lpm_tree; +} + +static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + if (--lpm_tree->ref_count == 0) + return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + return 0; +} + +static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; + } +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + return vr; + } + return NULL; +} + +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + /* Bind to tree 0 which is default */ + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static u32 mlxsw_sp_fix_tb_id(u32 tb_id) +{ + /* For our purpose, squash main and local table into one */ + if (tb_id == RT_TABLE_LOCAL) + tb_id = RT_TABLE_MAIN; + return tb_id; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int i; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (vr->used && vr->proto == proto && vr->tb_id == tb_id) + return vr; + } + return NULL; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_find_unused(mlxsw_sp); + if (!vr) + return ERR_PTR(-EBUSY); + vr->fib = mlxsw_sp_fib_create(); + if (IS_ERR(vr->fib)) + return ERR_CAST(vr->fib); + + vr->proto = proto; + vr->tb_id = tb_id; + mlxsw_sp_prefix_usage_zero(&req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + proto, true); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_tree_get; + } + vr->lpm_tree = lpm_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + if (err) + goto err_tree_bind; + + vr->used = true; + return vr; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); +err_tree_get: + mlxsw_sp_fib_destroy(vr->fib); + + return ERR_PTR(err); +} + +static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + mlxsw_sp_fib_destroy(vr->fib); + vr->used = false; +} + +static int +mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + + if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, + vr->proto, false); + if (IS_ERR(lpm_tree)) { + /* We failed to get a tree according to the required + * prefix usage. However, the current tree might be still good + * for us if our requirement is subset of the prefixes used + * in the tree. + */ + if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + return PTR_ERR(lpm_tree); + } + + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + vr->lpm_tree = lpm_tree; + return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int err; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); + if (!vr) { + vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); + if (IS_ERR(vr)) + return vr; + } else { + struct mlxsw_sp_prefix_usage req_prefix_usage; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &vr->fib->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + /* Need to replace LPM tree in case new prefix is required. */ + err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &req_prefix_usage); + if (err) + return ERR_PTR(err); + } + return vr; +} + +static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +{ + /* Destroy virtual router entity in case the associated FIB is empty + * and allow it to be used for other tables in future. Otherwise, + * check if some prefix usage did not disappear and change tree if + * that is the case. Note that in case new, smaller tree cannot be + * allocated, the original one will be kept being used. + */ + if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) + mlxsw_sp_vr_destroy(mlxsw_sp, vr); + else + mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &vr->fib->prefix_usage); +} + +static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + vr->id = i; + } +} + +struct mlxsw_sp_neigh_key { + unsigned char addr[sizeof(struct in6_addr)]; + struct net_device *dev; +}; + +struct mlxsw_sp_neigh_entry { + struct rhash_head ht_node; + struct mlxsw_sp_neigh_key key; + u16 rif; + struct neighbour *n; + bool offloaded; + struct delayed_work dw; + struct mlxsw_sp_port *mlxsw_sp_port; + unsigned char ha[ETH_ALEN]; + struct list_head nexthop_list; /* list of nexthops using + * this neigh entry + */ + struct list_head nexthop_neighs_list_node; +}; + +static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), + .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_neigh_key), +}; + +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, + struct net_device *dev, u16 rif, + struct neighbour *n) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + if (!neigh_entry) + return NULL; + memcpy(neigh_entry->key.addr, addr, addr_len); + neigh_entry->key.dev = dev; + neigh_entry->rif = rif; + neigh_entry->n = n; + INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); + INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; +} + +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + kfree(neigh_entry); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, + size_t addr_len, struct net_device *dev) +{ + struct mlxsw_sp_neigh_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.dev = dev; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); +} + +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_rif *r; + u32 dip; + int err; + + if (n->tbl != &arp_tbl) + return 0; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (neigh_entry) { + WARN_ON(neigh_entry->n != n); + return 0; + } + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (WARN_ON(!r)) + return -EINVAL; + + neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, + r->rif, n); + if (!neigh_entry) + return -ENOMEM; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + return 0; + +err_neigh_entry_insert: + mlxsw_sp_neigh_entry_destroy(neigh_entry); + return err; +} + +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 dip; + + if (n->tbl != &arp_tbl) + return; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (!neigh_entry) + return; + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_destroy(neigh_entry); +} + +static void +mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + + mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); +} + +static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int ent_index) +{ + struct net_device *dev; + struct neighbour *n; + __be32 dipn; + u32 dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + + if (!mlxsw_sp->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dipn = htonl(dip); + dev = mlxsw_sp->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + +static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + u8 num_entries; + int i; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + rec_index); + /* Hardware starts counting at 0, so add 1. */ + num_entries++; + + /* Each record consists of several neighbour entries. */ + for (i = 0; i < num_entries; i++) { + int ent_index; + + ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; + mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, + ent_index); + } + +} + +static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, int rec_index) +{ + switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { + case MLXSW_REG_RAUHTD_TYPE_IPV4: + mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + case MLXSW_REG_RAUHTD_TYPE_IPV6: + WARN_ON_ONCE(1); + break; + } +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + char *rauhtd_pl; + u8 num_rec; + int i, err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + /* Make sure the neighbour's netdev isn't removed in the + * process. + */ + rtnl_lock(); + do { + mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), + rauhtd_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + break; + } + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); + } while (num_rec); + rtnl_unlock(); + + kfree(rauhtd_pl); + return err; +} + +static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + /* If this neigh have nexthops, make the kernel think this neigh + * is active regardless of the traffic. + */ + if (!list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); +} + +static void +mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = mlxsw_sp->router.neighs_update.interval; + + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) +{ + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.neighs_update.dw.work); + int err; + + err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + + mlxsw_sp_router_neighs_update_nh(mlxsw_sp); + + mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); +} + +static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.nexthop_probe_dw.work); + + /* Iterate over nexthop neighbours, find those who are unresolved and + * send arp on them. This solves the chicken-egg problem when + * the nexthop wouldn't get offloaded until the neighbor is resolved + * but it wouldn't get resolved ever in case traffic is flowing in HW + * using different nexthop. + * + * Take RTNL mutex here to prevent lists from changes. + */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + if (!(neigh_entry->n->nud_state & NUD_VALID) && + !list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); + + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, + MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing); + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = + container_of(work, struct mlxsw_sp_neigh_entry, dw.work); + struct neighbour *n = neigh_entry->n; + struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + struct net_device *dev; + bool entry_connected; + u8 nud_state; + bool updating; + bool removing; + bool adding; + u32 dip; + int err; + + read_lock_bh(&n->lock); + dip = ntohl(*((__be32 *) n->primary_key)); + memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + nud_state = n->nud_state; + dev = n->dev; + read_unlock_bh(&n->lock); + + entry_connected = nud_state & NUD_VALID; + adding = (!neigh_entry->offloaded) && entry_connected; + updating = neigh_entry->offloaded && entry_connected; + removing = neigh_entry->offloaded && !entry_connected; + + if (adding || updating) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, + MLXSW_REG(rauht), rauht_pl); + if (err) { + netdev_err(dev, "Could not add neigh %pI4h\n", &dip); + neigh_entry->offloaded = false; + } else { + neigh_entry->offloaded = true; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); + } else if (removing) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), + rauht_pl); + if (err) { + netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); + neigh_entry->offloaded = true; + } else { + neigh_entry->offloaded = false; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + } + + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + unsigned long interval; + struct net_device *dev; + struct neigh_parms *p; + struct neighbour *n; + u32 dip; + + switch (event) { + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We don't care about changes in the default table. */ + if (!p->dev || p->tbl != &arp_tbl) + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use RCU variant to walk the device chain. + */ + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); + mlxsw_sp->router.neighs_update.interval = interval; + + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_NEIGH_UPDATE: + n = ptr; + dev = n->dev; + + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, + &dip, + sizeof(__be32), + dev); + if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + mlxsw_sp_port_dev_put(mlxsw_sp_port); + return NOTIFY_DONE; + } + neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + /* Take a reference to ensure the neighbour won't be + * destructed until we drop the reference in delayed + * work. + */ + neigh_clone(n); + if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); + } + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { + .notifier_call = mlxsw_sp_router_netevent_event, +}; + +static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->router.neigh_ht, + &mlxsw_sp_neigh_ht_params); + if (err) + return err; + + /* Initialize the polling interval according to the default + * table. + */ + mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); + + err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb); + if (err) + goto err_register_netevent_notifier; + + /* Create the delayed works for the activity_update */ + INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, + mlxsw_sp_router_neighs_update_work); + INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, + mlxsw_sp_router_probe_unresolved_nexthops); + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); + return 0; + +err_register_netevent_notifier: + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); + return err; +} + +static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); +} + +struct mlxsw_sp_nexthop { + struct list_head neigh_list_node; /* member of neigh entry list */ + struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group + * this belongs to + */ + u8 should_offload:1, /* set indicates this neigh is connected and + * should be put to KVD linear area of this group. + */ + offloaded:1, /* set in case the neigh is actually put into + * KVD linear area of this group. + */ + update:1; /* set indicates that MAC of this neigh should be + * updated in HW + */ + struct mlxsw_sp_neigh_entry *neigh_entry; +}; + +struct mlxsw_sp_nexthop_group { + struct list_head list; /* node in mlxsw->router.nexthop_group_list */ + struct list_head fib_list; /* list of fib entries that use this group */ + u8 adj_index_valid:1; + u32 adj_index; + u16 ecmp_size; + u16 count; + struct mlxsw_sp_nexthop nexthops[0]; +}; + +static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + u32 adj_index, u16 ecmp_size, + u32 new_adj_index, + u16 new_ecmp_size) +{ + char raleu_pl[MLXSW_REG_RALEU_LEN]; + + mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, + adj_index, ecmp_size, + new_adj_index, new_ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); +} + +static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + u32 old_adj_index, u16 old_ecmp_size) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr = NULL; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (vr == fib_entry->vr) + continue; + vr = fib_entry->vr; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + old_adj_index, + old_ecmp_size, + nh_grp->adj_index, + nh_grp->ecmp_size); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, adj_index, neigh_entry->rif); + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + u32 adj_index = nh_grp->adj_index; /* base */ + struct mlxsw_sp_nexthop *nh; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) { + nh->offloaded = 0; + continue; + } + + if (nh->update) { + err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, + adj_index, nh); + if (err) + return err; + nh->update = 0; + nh->offloaded = 1; + } + adj_index++; + } + return 0; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static int +mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + return err; + } + return 0; +} + +static void +mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + bool offload_change = false; + u32 adj_index; + u16 ecmp_size = 0; + bool old_adj_index_valid; + u32 old_adj_index; + u16 old_ecmp_size; + int ret; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (nh->should_offload ^ nh->offloaded) { + offload_change = true; + if (nh->should_offload) + nh->update = 1; + } + if (nh->should_offload) + ecmp_size++; + } + if (!offload_change) { + /* Nothing was added or removed, so no need to reallocate. Just + * update MAC on existing adjacency indexes. + */ + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + return; + } + if (!ecmp_size) + /* No neigh of this group is connected so we just set + * the trap and let everthing flow through kernel. + */ + goto set_trap; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); + if (ret < 0) { + /* We ran out of KVD linear space, just set the + * trap and let everything flow through kernel. + */ + dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); + goto set_trap; + } + adj_index = ret; + old_adj_index_valid = nh_grp->adj_index_valid; + old_adj_index = nh_grp->adj_index; + old_ecmp_size = nh_grp->ecmp_size; + nh_grp->adj_index_valid = 1; + nh_grp->adj_index = adj_index; + nh_grp->ecmp_size = ecmp_size; + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + + if (!old_adj_index_valid) { + /* The trap was set for fib entries, so we have to call + * fib entry update to unset it and use adjacency index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); + goto set_trap; + } + return; + } + + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, + old_adj_index, old_ecmp_size); + mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); + goto set_trap; + } + return; + +set_trap: + old_adj_index_valid = nh_grp->adj_index_valid; + nh_grp->adj_index_valid = 0; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + nh->offloaded = 0; + } + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + if (old_adj_index_valid) + mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); +} + +static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, + bool removing) +{ + if (!removing && !nh->should_offload) + nh->should_offload = 1; + else if (removing && nh->offloaded) + nh->should_offload = 0; + nh->update = 1; +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing) +{ + struct mlxsw_sp_nexthop *nh; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + rtnl_unlock(); +} + +static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 gwip = ntohl(fib_nh->nh_gw); + struct net_device *dev = fib_nh->nh_dev; + struct neighbour *n; + u8 nud_state; + + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + __be32 gwipn = htonl(gwip); + + n = neigh_create(&arp_tbl, &gwipn, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; + } + } else { + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The second branch takes the reference in neith_create() + */ + n = neigh_entry->n; + neigh_clone(n); + } + + /* If that is the first nexthop connected to that neigh, add to + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_add_tail(&neigh_entry->nexthop_neighs_list_node, + &mlxsw_sp->router.nexthop_neighs_list); + + nh->nh_grp = nh_grp; + nh->neigh_entry = neigh_entry; + list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + read_unlock_bh(&n->lock); + __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); + + return 0; +} + +static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + + list_del(&nh->neigh_list_node); + + /* If that is the last nexthop connected to that neigh, remove from + * nexthop_neighs_list + */ + if (list_empty(&nh->neigh_entry->nexthop_list)) + list_del(&nh->neigh_entry->nexthop_neighs_list_node); + + neigh_release(neigh_entry->n); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + struct fib_nh *fib_nh; + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(*nh_grp) + + fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->count = fi->fib_nhs; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + fib_nh = &fi->fib_nh[i]; + err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop_init; + } + list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_init: + for (i--; i >= 0; i--) + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i; + + list_del(&nh_grp->list); + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + } + kfree(nh_grp); +} + +static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, + struct fib_info *fi) +{ + int i; + + for (i = 0; i < fi->fib_nhs; i++) { + struct fib_nh *fib_nh = &fi->fib_nh[i]; + u32 gwip = ntohl(fib_nh->nh_gw); + + if (memcmp(nh->neigh_entry->key.addr, + &gwip, sizeof(u32)) == 0 && + nh->neigh_entry->key.dev == fib_nh->nh_dev) + return true; + } + return false; +} + +static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, + struct fib_info *fi) +{ + int i; + + if (nh_grp->count != fi->fib_nhs) + return false; + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (!mlxsw_sp_nexthop_match(nh, fi)) + return false; + } + return true; +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, + list) { + if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) + return nh_grp; + } + return NULL; +} + +static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); + fib_entry->nh_group = nh_grp; + return 0; +} + +static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + mlxsw_sp_lpm_init(mlxsw_sp); + mlxsw_sp_vrs_init(mlxsw_sp); + return mlxsw_sp_neigh_init(mlxsw_sp); +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_neigh_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} + +static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + enum mlxsw_reg_ralue_trap_action trap_action; + u16 trap_id = 0; + u32 adjacency_index = 0; + u16 ecmp_size = 0; + + /* In case the nexthop group adjacency index is valid, use it + * with provided ECMP size. Otherwise, setup trap and pass + * traffic to kernel. + */ + if (fib_entry->nh_group->adj_index_valid) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = fib_entry->nh_group->adj_index; + ecmp_size = fib_entry->nh_group->ecmp_size; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_local_pack(ralue_pl, + MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, + fib_entry->rif); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: + return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); + case MLXSW_SP_L3_PROTO_IPV6: + return -EINVAL; + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + enum mlxsw_reg_ralue_op op; + + op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE : + MLXSW_REG_RALUE_OP_WRITE_UPDATE; + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); +} + +static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_DELETE); +} + +struct mlxsw_sp_router_fib4_add_info { + struct switchdev_trans_item tritem; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; +}; + +static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) +{ + const struct mlxsw_sp_router_fib4_add_info *info = data; + struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; + struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr); + kfree(info); +} + +static int +mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct fib_info *fi = fib4->fi; + + if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } + if (fib4->type != RTN_UNICAST) + return -EINVAL; + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + struct mlxsw_sp_rif *r; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); + if (!r) + return -EINVAL; + fib_entry->rif = r->rif; + return 0; + } + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); +} + +static void +mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + return; + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); +} + +static int +mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len); + if (!fib_entry) { + err = -ENOMEM; + goto err_fib_entry_create; + } + fib_entry->vr = vr; + + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + if (err) + goto err_fib4_entry_init; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto err_alloc_info; + } + info->mlxsw_sp = mlxsw_sp; + info->fib_entry = fib_entry; + switchdev_trans_item_enqueue(trans, info, + mlxsw_sp_router_fib4_add_info_destroy, + &info->tritem); + return 0; + +err_alloc_info: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); +err_fib4_entry_init: + mlxsw_sp_fib_entry_destroy(fib_entry); +err_fib_entry_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return err; +} + +static int +mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + int err; + + info = switchdev_trans_item_dequeue(trans); + fib_entry = info->fib_entry; + kfree(info); + + vr = fib_entry->vr; + err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry); + if (err) + goto err_fib_entry_insert; + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_add; + return 0; + +err_fib_entry_add: + mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); +err_fib_entry_insert: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, vr); + return err; +} + +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + if (switchdev_trans_ph_prepare(trans)) + return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, + fib4, trans); + return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, + fib4, trans); +} + +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + if (!vr) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n"); + return -ENOENT; + } + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len); + if (!fib_entry) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); + return -ENOENT; + } + mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, vr); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 3710f19ed6bb..a1ad5e6bdfa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -55,13 +55,10 @@ static u16 mlxsw_sp_port_vid_to_fid_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); u16 fid = vid; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); - - fid = mlxsw_sp_vfid_to_fid(vfid); - } + fid = f ? f->fid : fid; if (!fid) fid = mlxsw_sp_port->pvid; @@ -169,11 +166,6 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } -static bool mlxsw_sp_vfid_is_vport_br(u16 vfid) -{ - return vfid >= MLXSW_SP_VFID_PORT_MAX; -} - static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 idx_begin, u16 idx_end, bool set, bool only_uc) @@ -185,15 +177,10 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, char *sftr_pl; int err; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - if (mlxsw_sp_vfid_is_vport_br(idx_begin)) - local_port = mlxsw_sp_port->local_port; - else - local_port = MLXSW_PORT_CPU_PORT; - } else { + else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - } sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); if (!sftr_pl) @@ -236,7 +223,8 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, int err; if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + u16 fid = mlxsw_sp_vport_fid_get(mlxsw_sp_port)->fid; + u16 vfid = mlxsw_sp_fid_to_vfid(fid); return __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid, set, true); @@ -260,14 +248,17 @@ err_port_flood_set: return err; } -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, - bool set, bool only_uc) +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool set) { + u16 vfid; + /* In case of vFIDs, index into the flooding table is relative to * the start of the vFIDs range. */ + vfid = mlxsw_sp_fid_to_vfid(fid); return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, - only_uc); + false); } static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -383,6 +374,187 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } +static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_fid_map(struct mlxsw_sp *mlxsw_sp, u16 fid, bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->fid = fid; + + return f; +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) +{ + struct mlxsw_sp_fid *f; + int err; + + err = mlxsw_sp_fid_op(mlxsw_sp, fid, true); + if (err) + return ERR_PTR(err); + + /* Although all the ports member in the FID might be using a + * {Port, VID} to FID mapping, we create a global VID-to-FID + * mapping. This allows a port to transition to VLAN mode, + * knowing the global mapping exists. + */ + err = mlxsw_sp_fid_map(mlxsw_sp, fid, true); + if (err) + goto err_fid_map; + + f = mlxsw_sp_fid_alloc(fid); + if (!f) { + err = -ENOMEM; + goto err_allocate_fid; + } + + list_add(&f->list, &mlxsw_sp->fids); + + return f; + +err_allocate_fid: + mlxsw_sp_fid_map(mlxsw_sp, fid, false); +err_fid_map: + mlxsw_sp_fid_op(mlxsw_sp, fid, false); + return ERR_PTR(err); +} + +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) +{ + u16 fid = f->fid; + + list_del(&f->list); + + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + + kfree(f); + + mlxsw_sp_fid_op(mlxsw_sp, fid, false); +} + +static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); + if (!f) { + f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid); + if (IS_ERR(f)) + return PTR_ERR(f); + } + + f->ref_count++; + + netdev_dbg(mlxsw_sp_port->dev, "Joined FID=%d\n", fid); + + return 0; +} + +static void __mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); + if (WARN_ON(!f)) + return; + + netdev_dbg(mlxsw_sp_port->dev, "Left FID=%d\n", fid); + + mlxsw_sp_port_fdb_flush(mlxsw_sp_port, fid); + + if (--f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp_port->mlxsw_sp, f); +} + +static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid, + bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + + /* If port doesn't have vPorts, then it can use the global + * VID-to-FID mapping. + */ + if (list_empty(&mlxsw_sp_port->vports_list)) + return 0; + + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, valid, fid, fid); +} + +static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_begin, u16 fid_end) +{ + int fid, err; + + for (fid = fid_begin; fid <= fid_end; fid++) { + err = __mlxsw_sp_port_fid_join(mlxsw_sp_port, fid); + if (err) + goto err_port_fid_join; + } + + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, + true, false); + if (err) + goto err_port_flood_set; + + for (fid = fid_begin; fid <= fid_end; fid++) { + err = mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, true); + if (err) + goto err_port_fid_map; + } + + return 0; + +err_port_fid_map: + for (fid--; fid >= fid_begin; fid--) + mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); + __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, + false); +err_port_flood_set: + fid = fid_end; +err_port_fid_join: + for (fid--; fid >= fid_begin; fid--) + __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); + return err; +} + +static void mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_begin, u16 fid_end) +{ + int fid; + + for (fid = fid_begin; fid <= fid_end; fid++) + mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); + + __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, + false); + + for (fid = fid_begin; fid <= fid_end; fid++) + __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); +} + static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { @@ -440,74 +612,6 @@ err_port_allow_untagged_set: return err; } -static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - int err; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, fid); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); - - if (err) - return err; - - set_bit(fid, mlxsw_sp->active_fids); - return 0; -} - -static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, u16 fid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - clear_bit(fid, mlxsw_sp->active_fids); - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, - fid, fid); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) -{ - enum mlxsw_reg_svfa_mt mt; - - if (!list_empty(&mlxsw_sp_port->vports_list)) - mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - else - mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, fid, fid); -} - -static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) -{ - enum mlxsw_reg_svfa_mt mt; - - if (list_empty(&mlxsw_sp_port->vports_list)) - return 0; - - mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, fid, fid); -} - -static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_add_vid(dev, 0, vid); - if (err) - goto err_port_add_vid; - } - return 0; - -err_port_add_vid: - for (vid--; vid >= vid_begin; vid--) - mlxsw_sp_port_kill_vid(dev, 0, vid); - return err; -} - static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged) @@ -533,57 +637,17 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, last_visited_vid, old_pvid; - enum mlxsw_reg_svfa_mt mt; + u16 vid, old_pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then packets ingressing through the port with - * the specified VIDs will be directed to CPU. - */ if (!mlxsw_sp_port->bridged) - return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end); - - for (vid = vid_begin; vid <= vid_end; vid++) { - if (!test_bit(vid, mlxsw_sp->active_fids)) { - err = mlxsw_sp_fid_create(mlxsw_sp, vid); - if (err) { - netdev_err(dev, "Failed to create FID=%d\n", - vid); - return err; - } - - /* When creating a FID, we set a VID to FID mapping - * regardless of the port's mode. - */ - mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, - true, vid, vid); - if (err) { - netdev_err(dev, "Failed to create FID=VID=%d mapping\n", - vid); - goto err_port_vid_to_fid_set; - } - } - } - - /* Set FID mapping according to port's mode */ - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); - if (err) { - netdev_err(dev, "Failed to map FID=%d", vid); - last_visited_vid = --vid; - goto err_port_fid_map; - } - } + return -EINVAL; - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, - true, false); + err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end); if (err) { - netdev_err(dev, "Failed to configure flooding\n"); - goto err_port_flood_set; + netdev_err(dev, "Failed to join FIDs\n"); + return err; } err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, @@ -628,10 +692,6 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_port_vid_to_fid_set: - mlxsw_sp_fid_destroy(mlxsw_sp, vid); - return err; - err_port_stp_state_set: for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -641,13 +701,7 @@ err_port_pvid_set: __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, false); err_port_vlans_set: - __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); -err_port_flood_set: - last_visited_vid = vid_end; -err_port_fid_map: - for (vid = last_visited_vid; vid >= vid_begin; vid--) - mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); return err; } @@ -678,9 +732,10 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } -static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const char *mac, u16 fid, bool adding, - bool dynamic) +static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + enum mlxsw_reg_sfd_rec_action action, + bool dynamic) { char *sfd_pl; int err; @@ -691,14 +746,29 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, - local_port); + mac, fid, action, local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + bool dynamic) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); +} + +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, + false); +} + static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, const char *mac, u16 fid, u16 lag_vid, bool adding, bool dynamic) @@ -903,6 +973,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -921,21 +996,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, return err; } -static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_kill_vid(dev, 0, vid); - if (err) - return err; - } - - return 0; -} - static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool init) { @@ -943,12 +1003,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then prevent packets ingressing through the - * port with the specified VIDs from being trapped to CPU. - */ if (!init && !mlxsw_sp_port->bridged) - return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); + return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, false); @@ -970,21 +1026,7 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, } } - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - if (err) { - netdev_err(dev, "Failed to clear flooding\n"); - return err; - } - - for (vid = vid_begin; vid <= vid_end; vid++) { - /* Remove FID mapping in case of Virtual mode */ - err = mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); - if (err) { - netdev_err(dev, "Failed to unmap FID=%d", vid); - return err; - } - } + mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); out: /* Changing activity bits only if HW operation succeded */ @@ -1081,6 +1123,10 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -1118,7 +1164,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port *tmp; - u16 vport_fid = 0; + struct mlxsw_sp_fid *f; + u16 vport_fid; char *sfd_pl; char mac[ETH_ALEN]; u16 fid; @@ -1133,12 +1180,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (!sfd_pl) return -ENOMEM; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 tmp; - - tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); - vport_fid = mlxsw_sp_vfid_to_fid(tmp); - } + f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); + vport_fid = f ? f->fid : 0; mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { @@ -1310,11 +1353,10 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, } if (mlxsw_sp_fid_is_vfid(fid)) { - u16 vfid = mlxsw_sp_fid_to_vfid(fid); struct mlxsw_sp_port *mlxsw_sp_vport; - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, - vfid); + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, + fid); if (!mlxsw_sp_vport) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); goto just_remove; @@ -1370,11 +1412,10 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, } if (mlxsw_sp_fid_is_vfid(fid)) { - u16 vfid = mlxsw_sp_fid_to_vfid(fid); struct mlxsw_sp_port *mlxsw_sp_vport; - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, - vfid); + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, + fid); if (!mlxsw_sp_vport) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); goto just_remove; @@ -1495,14 +1536,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mlxsw_sp->fdb_notify.dw); } -static void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) -{ - u16 fid; - - for_each_set_bit(fid, mlxsw_sp->active_fids, VLAN_N_VID) - mlxsw_sp_fid_destroy(mlxsw_sp, fid); -} - int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { return mlxsw_sp_fdb_init(mlxsw_sp); @@ -1511,7 +1544,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - mlxsw_sp_fids_fini(mlxsw_sp); } int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 25f658b3849a..377daa4d509c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1541,6 +1541,7 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = { .type = MLXSW_PORT_SWID_TYPE_ETH, } }, + .resource_query_enable = 0, }; static struct mlxsw_driver mlxsw_sx_driver = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 53a9550be75e..470d7696e9fe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -54,6 +54,11 @@ enum { MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + MLXSW_TRAP_ID_ARPBC = 0x50, + MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, + MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, MLXSW_TRAP_ID_MAX = 0x1FF }; |