diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
59 files changed, 3550 insertions, 658 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 1486ce902a56..9ca3734ebb6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -4,6 +4,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" + depends on MAY_USE_DEVLINK depends on PCI select MLX4_CORE select PTP_1588_CLOCK diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 715de8affcc9..c7e939945259 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -182,10 +182,17 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) err = mlx4_reset_slave(dev); else err = mlx4_reset_master(dev); - BUG_ON(err != 0); + if (!err) { + mlx4_err(dev, "device was reset successfully\n"); + } else { + /* EEH could have disabled the PCI channel during reset. That's + * recoverable and the PCI error flow will handle it. + */ + if (!pci_channel_offline(dev->persist->pdev)) + BUG_ON(1); + } dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; - mlx4_err(dev, "device was reset successfully\n"); mutex_unlock(&persist->device_state_mutex); /* At that step HW was already reset, now notify clients */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index d48d5793407d..e94ca1c3fc7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2429,7 +2429,7 @@ err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); err_slaves: - while (--i) { + while (i--) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 3348e646db70..a849da92f857 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (timestamp_en) cq_context->flags |= cpu_to_be32(1 << 19); - cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); + cq_context->logsize_usrpage = + cpu_to_be32((ilog2(nent) << 24) | + mlx4_to_hw_uar_index(dev, uar->index)); cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 038f9ce391e6..1494997c4f7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; +#define MLX4_EN_WRAP_AROUND_SEC 10ULL + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? + max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; @@ -254,12 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); - /* Using shift to make calculation more accurate. Since current HW - * clock frequency is 427 MHz, and cycles are given using a 48 bits - * register, the biggest shift when calculating using u64, is 14 - * (max_cycles * multiplier < 2^64) - */ - mdev->cycles.shift = 14; + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index dd84cabb2a51..f69584a9b47f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -501,34 +501,30 @@ static u32 mlx4_en_autoneg_get(struct net_device *dev) return autoneg; } -static u32 ptys_get_supported_port(struct mlx4_ptys_reg *ptys_reg) +static void ptys2ethtool_update_supported_port(unsigned long *mask, + struct mlx4_ptys_reg *ptys_reg) { u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap); if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T) | MLX4_PROT_MASK(MLX4_1000BASE_T) | MLX4_PROT_MASK(MLX4_100BASE_TX))) { - return SUPPORTED_TP; - } - - if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) + __set_bit(ETHTOOL_LINK_MODE_TP_BIT, mask); + } else if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) | MLX4_PROT_MASK(MLX4_10GBASE_SR) | MLX4_PROT_MASK(MLX4_56GBASE_SR4) | MLX4_PROT_MASK(MLX4_40GBASE_CR4) | MLX4_PROT_MASK(MLX4_40GBASE_SR4) | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) { - return SUPPORTED_FIBRE; - } - - if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) + __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask); + } else if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) | MLX4_PROT_MASK(MLX4_40GBASE_KR4) | MLX4_PROT_MASK(MLX4_20GBASE_KR2) | MLX4_PROT_MASK(MLX4_10GBASE_KR) | MLX4_PROT_MASK(MLX4_10GBASE_KX4) | MLX4_PROT_MASK(MLX4_1000BASE_KX))) { - return SUPPORTED_Backplane; + __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mask); } - return 0; } static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg) @@ -574,122 +570,111 @@ static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg) enum ethtool_report { SUPPORTED = 0, ADVERTISED = 1, - SPEED = 2 }; +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + u32 speed; +}; + +static unsigned long *ptys2ethtool_link_mode(struct ptys2ethtool_config *cfg, + enum ethtool_report report) +{ + switch (report) { + case SUPPORTED: + return cfg->supported; + case ADVERTISED: + return cfg->advertised; + } + return NULL; +} + +#define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_map[reg_]; \ + cfg->speed = speed_; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + /* Translates mlx4 link mode to equivalent ethtool Link modes/speed */ -static u32 ptys2ethtool_map[MLX4_LINK_MODES_SZ][3] = { - [MLX4_100BASE_TX] = { - SUPPORTED_100baseT_Full, - ADVERTISED_100baseT_Full, - SPEED_100 - }, - - [MLX4_1000BASE_T] = { - SUPPORTED_1000baseT_Full, - ADVERTISED_1000baseT_Full, - SPEED_1000 - }, - [MLX4_1000BASE_CX_SGMII] = { - SUPPORTED_1000baseKX_Full, - ADVERTISED_1000baseKX_Full, - SPEED_1000 - }, - [MLX4_1000BASE_KX] = { - SUPPORTED_1000baseKX_Full, - ADVERTISED_1000baseKX_Full, - SPEED_1000 - }, - - [MLX4_10GBASE_T] = { - SUPPORTED_10000baseT_Full, - ADVERTISED_10000baseT_Full, - SPEED_10000 - }, - [MLX4_10GBASE_CX4] = { - SUPPORTED_10000baseKX4_Full, - ADVERTISED_10000baseKX4_Full, - SPEED_10000 - }, - [MLX4_10GBASE_KX4] = { - SUPPORTED_10000baseKX4_Full, - ADVERTISED_10000baseKX4_Full, - SPEED_10000 - }, - [MLX4_10GBASE_KR] = { - SUPPORTED_10000baseKR_Full, - ADVERTISED_10000baseKR_Full, - SPEED_10000 - }, - [MLX4_10GBASE_CR] = { - SUPPORTED_10000baseKR_Full, - ADVERTISED_10000baseKR_Full, - SPEED_10000 - }, - [MLX4_10GBASE_SR] = { - SUPPORTED_10000baseKR_Full, - ADVERTISED_10000baseKR_Full, - SPEED_10000 - }, - - [MLX4_20GBASE_KR2] = { - SUPPORTED_20000baseMLD2_Full | SUPPORTED_20000baseKR2_Full, - ADVERTISED_20000baseMLD2_Full | ADVERTISED_20000baseKR2_Full, - SPEED_20000 - }, - - [MLX4_40GBASE_CR4] = { - SUPPORTED_40000baseCR4_Full, - ADVERTISED_40000baseCR4_Full, - SPEED_40000 - }, - [MLX4_40GBASE_KR4] = { - SUPPORTED_40000baseKR4_Full, - ADVERTISED_40000baseKR4_Full, - SPEED_40000 - }, - [MLX4_40GBASE_SR4] = { - SUPPORTED_40000baseSR4_Full, - ADVERTISED_40000baseSR4_Full, - SPEED_40000 - }, - - [MLX4_56GBASE_KR4] = { - SUPPORTED_56000baseKR4_Full, - ADVERTISED_56000baseKR4_Full, - SPEED_56000 - }, - [MLX4_56GBASE_CR4] = { - SUPPORTED_56000baseCR4_Full, - ADVERTISED_56000baseCR4_Full, - SPEED_56000 - }, - [MLX4_56GBASE_SR4] = { - SUPPORTED_56000baseSR4_Full, - ADVERTISED_56000baseSR4_Full, - SPEED_56000 - }, +static struct ptys2ethtool_config ptys2ethtool_map[MLX4_LINK_MODES_SZ]; + +void __init mlx4_en_init_ptys2ethtool_map(void) +{ + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_100BASE_TX, SPEED_100, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_CR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_KR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_SR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_KR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_CR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT); + MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_SR4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT); }; -static u32 ptys2ethtool_link_modes(u32 eth_proto, enum ethtool_report report) +static void ptys2ethtool_update_link_modes(unsigned long *link_modes, + u32 eth_proto, + enum ethtool_report report) { int i; - u32 link_modes = 0; - for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { if (eth_proto & MLX4_PROT_MASK(i)) - link_modes |= ptys2ethtool_map[i][report]; + bitmap_or(link_modes, link_modes, + ptys2ethtool_link_mode(&ptys2ethtool_map[i], + report), + __ETHTOOL_LINK_MODE_MASK_NBITS); } - return link_modes; } -static u32 ethtool2ptys_link_modes(u32 link_modes, enum ethtool_report report) +static u32 ethtool2ptys_link_modes(const unsigned long *link_modes, + enum ethtool_report report) { int i; u32 ptys_modes = 0; for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { - if (ptys2ethtool_map[i][report] & link_modes) + if (bitmap_intersects( + ptys2ethtool_link_mode(&ptys2ethtool_map[i], + report), + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= 1 << i; } return ptys_modes; @@ -702,14 +687,15 @@ static u32 speed2ptys_link_modes(u32 speed) u32 ptys_modes = 0; for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { - if (ptys2ethtool_map[i][SPEED] == speed) + if (ptys2ethtool_map[i].speed == speed) ptys_modes |= 1 << i; } return ptys_modes; } -static int ethtool_get_ptys_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int +ethtool_get_ptys_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *link_ksettings) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_ptys_reg ptys_reg; @@ -737,79 +723,102 @@ static int ethtool_get_ptys_settings(struct net_device *dev, en_dbg(DRV, priv, "ptys_reg.eth_proto_lp_adv %x\n", be32_to_cpu(ptys_reg.eth_proto_lp_adv)); - cmd->supported = 0; - cmd->advertising = 0; + /* reset supported/advertising masks */ + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - cmd->supported |= ptys_get_supported_port(&ptys_reg); + ptys2ethtool_update_supported_port(link_ksettings->link_modes.supported, + &ptys_reg); eth_proto = be32_to_cpu(ptys_reg.eth_proto_cap); - cmd->supported |= ptys2ethtool_link_modes(eth_proto, SUPPORTED); + ptys2ethtool_update_link_modes(link_ksettings->link_modes.supported, + eth_proto, SUPPORTED); eth_proto = be32_to_cpu(ptys_reg.eth_proto_admin); - cmd->advertising |= ptys2ethtool_link_modes(eth_proto, ADVERTISED); + ptys2ethtool_update_link_modes(link_ksettings->link_modes.advertising, + eth_proto, ADVERTISED); - cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - cmd->advertising |= (priv->prof->tx_pause) ? ADVERTISED_Pause : 0; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Pause); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Asym_Pause); - cmd->advertising |= (priv->prof->tx_pause ^ priv->prof->rx_pause) ? - ADVERTISED_Asym_Pause : 0; + if (priv->prof->tx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Pause); + if (priv->prof->tx_pause ^ priv->prof->rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Asym_Pause); - cmd->port = ptys_get_active_port(&ptys_reg); - cmd->transceiver = (SUPPORTED_TP & cmd->supported) ? - XCVR_EXTERNAL : XCVR_INTERNAL; + link_ksettings->base.port = ptys_get_active_port(&ptys_reg); if (mlx4_en_autoneg_get(dev)) { - cmd->supported |= SUPPORTED_Autoneg; - cmd->advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); } - cmd->autoneg = (priv->port_state.flags & MLX4_EN_PORT_ANC) ? + link_ksettings->base.autoneg + = (priv->port_state.flags & MLX4_EN_PORT_ANC) ? AUTONEG_ENABLE : AUTONEG_DISABLE; eth_proto = be32_to_cpu(ptys_reg.eth_proto_lp_adv); - cmd->lp_advertising = ptys2ethtool_link_modes(eth_proto, ADVERTISED); - cmd->lp_advertising |= (priv->port_state.flags & MLX4_EN_PORT_ANC) ? - ADVERTISED_Autoneg : 0; + ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising); + ptys2ethtool_update_link_modes( + link_ksettings->link_modes.lp_advertising, + eth_proto, ADVERTISED); + if (priv->port_state.flags & MLX4_EN_PORT_ANC) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); - cmd->phy_address = 0; - cmd->mdio_support = 0; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + link_ksettings->base.phy_address = 0; + link_ksettings->base.mdio_support = 0; + link_ksettings->base.eth_tp_mdix = ETH_TP_MDI_INVALID; + link_ksettings->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; return ret; } -static void ethtool_get_default_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static void +ethtool_get_default_link_ksettings( + struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { struct mlx4_en_priv *priv = netdev_priv(dev); int trans_type; - cmd->autoneg = AUTONEG_DISABLE; - cmd->supported = SUPPORTED_10000baseT_Full; - cmd->advertising = ADVERTISED_10000baseT_Full; - trans_type = priv->port_state.transceiver; + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + 10000baseT_Full); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, + 10000baseT_Full); + + trans_type = priv->port_state.transceiver; if (trans_type > 0 && trans_type <= 0xC) { - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_EXTERNAL; - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_FIBRE; + link_ksettings->base.port = PORT_FIBRE; + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); } else if (trans_type == 0x80 || trans_type == 0) { - cmd->port = PORT_TP; - cmd->transceiver = XCVR_INTERNAL; - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_TP; + link_ksettings->base.port = PORT_TP; + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); } else { - cmd->port = -1; - cmd->transceiver = -1; + link_ksettings->base.port = -1; } } -static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int +mlx4_en_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *link_ksettings) { struct mlx4_en_priv *priv = netdev_priv(dev); int ret = -EINVAL; @@ -822,16 +831,16 @@ static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) priv->port_state.flags & MLX4_EN_PORT_ANE); if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) - ret = ethtool_get_ptys_settings(dev, cmd); + ret = ethtool_get_ptys_link_ksettings(dev, link_ksettings); if (ret) /* ETH PROT CRTL is not supported or PTYS CMD failed */ - ethtool_get_default_settings(dev, cmd); + ethtool_get_default_link_ksettings(dev, link_ksettings); if (netif_carrier_ok(dev)) { - ethtool_cmd_speed_set(cmd, priv->port_state.link_speed); - cmd->duplex = DUPLEX_FULL; + link_ksettings->base.speed = priv->port_state.link_speed; + link_ksettings->base.duplex = DUPLEX_FULL; } else { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + link_ksettings->base.speed = SPEED_UNKNOWN; + link_ksettings->base.duplex = DUPLEX_UNKNOWN; } return 0; } @@ -855,21 +864,29 @@ static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed, return proto_admin; } -static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int +mlx4_en_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_ptys_reg ptys_reg; __be32 proto_admin; int ret; - u32 ptys_adv = ethtool2ptys_link_modes(cmd->advertising, ADVERTISED); - int speed = ethtool_cmd_speed(cmd); + u32 ptys_adv = ethtool2ptys_link_modes( + link_ksettings->link_modes.advertising, ADVERTISED); + const int speed = link_ksettings->base.speed; - en_dbg(DRV, priv, "Set Speed=%d adv=0x%x autoneg=%d duplex=%d\n", - speed, cmd->advertising, cmd->autoneg, cmd->duplex); + en_dbg(DRV, priv, + "Set Speed=%d adv={%*pbl} autoneg=%d duplex=%d\n", + speed, __ETHTOOL_LINK_MODE_MASK_NBITS, + link_ksettings->link_modes.advertising, + link_ksettings->base.autoneg, + link_ksettings->base.duplex); - if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) || - (cmd->duplex == DUPLEX_HALF)) + if (!(priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) || + (link_ksettings->base.duplex == DUPLEX_HALF)) return -EINVAL; memset(&ptys_reg, 0, sizeof(ptys_reg)); @@ -883,7 +900,7 @@ static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return 0; } - proto_admin = cmd->autoneg == AUTONEG_ENABLE ? + proto_admin = link_ksettings->base.autoneg == AUTONEG_ENABLE ? cpu_to_be32(ptys_adv) : speed_set_ptys_admin(priv, speed, ptys_reg.eth_proto_cap); @@ -1982,8 +1999,8 @@ static int mlx4_en_set_phys_id(struct net_device *dev, const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, - .get_settings = mlx4_en_get_settings, - .set_settings = mlx4_en_set_settings, + .get_link_ksettings = mlx4_en_get_link_ksettings, + .set_link_ksettings = mlx4_en_set_link_ksettings, .get_link = ethtool_op_get_link, .get_strings = mlx4_en_get_strings, .get_sset_count = mlx4_en_get_sset_count, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index e0ec280a7fa1..bf7628db098a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -382,6 +382,7 @@ static void mlx4_en_verify_params(void) static int __init mlx4_en_init(void) { mlx4_en_verify_params(); + mlx4_en_init_ptys2ethtool_map(); return mlx4_register_interface(&mlx4_en_interface); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0c7e3f69a73b..b4b258c8ca47 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -40,6 +40,7 @@ #include <net/ip.h> #include <net/busy_poll.h> #include <net/vxlan.h> +#include <net/devlink.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/device.h> @@ -69,6 +70,15 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return mlx4_en_setup_tc(dev, tc->tc); +} + #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -2024,8 +2034,11 @@ void mlx4_en_destroy_netdev(struct net_device *dev) en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); /* Unregister device - this will close the port if it was up */ - if (priv->registered) + if (priv->registered) { + devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, + priv->port)); unregister_netdev(dev); + } if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); @@ -2245,7 +2258,7 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) struct mlx4_en_dev *mdev = en_priv->mdev; u64 mac_u64 = mlx4_mac_to_u64(mac); - if (!is_valid_ether_addr(mac)) + if (is_multicast_ether_addr(mac)) return -EINVAL; return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); @@ -2344,8 +2357,6 @@ out: /* set offloads */ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2356,8 +2367,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) /* unset offloads */ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); - priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2466,7 +2475,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif @@ -2504,7 +2513,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_fix_features = mlx4_en_fix_features, - .ndo_setup_tc = mlx4_en_setup_tc, + .ndo_setup_tc = __mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif @@ -2980,6 +2989,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; mdev->upper[port] = NULL; @@ -3041,6 +3055,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } priv->registered = 1; + devlink_port_type_eth_set(mlx4_get_devlink_port(mdev->dev, priv->port), + dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index ee99e67187f5..3904b5fc0b7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -238,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 12aab5a659d3..02e925d6f734 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; } - context->usr_page = cpu_to_be32(mdev->priv_uar.index); + context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + mdev->priv_uar.index)); context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 41440b2b20a3..86bcfe510e4e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -82,8 +82,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, /* Not doing get_page() for each frag is a big win * on asymetric workloads. Note we can not use atomic_set(). */ - atomic_add(page_alloc->page_size / frag_info->frag_stride - 1, - &page->_count); + page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1); return 0; } @@ -127,7 +126,7 @@ out: dma_unmap_page(priv->ddev, page_alloc[i].dma, page_alloc[i].page_size, PCI_DMA_FROMDEVICE); page = page_alloc[i].page; - atomic_set(&page->_count, 1); + set_page_count(page, 1); put_page(page); } } @@ -165,7 +164,7 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", i, ring->page_alloc[i].page_size, - atomic_read(&ring->page_alloc[i].page->_count)); + page_ref_count(ring->page_alloc[i].page)); } return 0; @@ -177,7 +176,7 @@ out: dma_unmap_page(priv->ddev, page_alloc->dma, page_alloc->page_size, PCI_DMA_FROMDEVICE); page = page_alloc->page; - atomic_set(&page->_count, 1); + set_page_count(page, 1); put_page(page); page_alloc->page = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4421bf5463f6..c0d7b7296236 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, user_prio, &ring->context); if (ring->bf_alloced) - ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); + ring->context.usr_page = + cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + ring->bf.uar->index)); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); @@ -274,7 +276,8 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp) + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -345,7 +348,8 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, } } } - dev_consume_skb_any(skb); + napi_consume_skb(skb, napi_mode); + return tx_info->nr_txbb; } @@ -369,7 +373,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size), 0); + !!(ring->cons & ring->size), 0, + 0 /* Non-NAPI caller */); ring->cons += ring->last_nr_txbb; cnt++; } @@ -383,7 +388,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } static bool mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq) + struct mlx4_en_cq *cq, int napi_budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; @@ -449,7 +454,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & - ring->size), timestamp); + ring->size), timestamp, napi_budget); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & @@ -509,7 +514,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) struct mlx4_en_priv *priv = netdev_priv(dev); int clean_complete; - clean_complete = mlx4_en_process_tx_cq(dev, cq); + clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); if (!clean_complete) return budget; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 4696053165f8..f613977455e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->persist->pdev, 2) + - ((eq->eqn / 4) << PAGE_SHIFT), - PAGE_SIZE); + ioremap( + pci_resource_start(dev->persist->pdev, 2) + + ((eq->eqn / 4) << (dev->uar_page_shift)), + (1 << (dev->uar_page_shift))); if (!priv->eq_table.uar_map[index]) { mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", eq->eqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d66c690a8597..e97094598b2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", - [33] = "RoCEv2 support" + [33] = "RoCEv2 support", + [34] = "DMFS Sniffer support (UC & MC)" }; int i; @@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 0472941af820..dec77d6f0ac9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/errno.h> +#include <net/devlink.h> #include "mlx4.h" @@ -249,3 +250,11 @@ void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int return result; } EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev); + +struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + + return &info->devlink_port; +} +EXPORT_SYMBOL_GPL(mlx4_get_devlink_port); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index f1b6d219e445..358f7230da58 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/kmod.h> +#include <net/devlink.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -104,6 +105,11 @@ module_param(enable_64b_cqe_eqe, bool, 0444); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); +static bool enable_4k_uar; +module_param(enable_4k_uar, bool, 0444); +MODULE_PARM_DESC(enable_4k_uar, + "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)"); + #define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \ MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) @@ -168,6 +174,20 @@ struct mlx4_port_config { static atomic_t pf_loading = ATOMIC_INIT(0); +static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + /* The reserved_uars is calculated by system page size unit. + * Therefore, adjustment is added when the uar page size is less + * than the system page size + */ + dev->caps.reserved_uars = + max_t(int, + mlx4_get_num_reserved_uar(dev), + dev_cap->reserved_uars / + (1 << (PAGE_SHIFT - dev->uar_page_shift))); +} + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -386,8 +406,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; - /* The first 128 UARs are used for EQ doorbells */ - dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; @@ -405,6 +423,19 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + /* Save uar page shift */ + if (!mlx4_is_slave(dev)) { + /* Virtual PCI function needs to determine UAR page size from + * firmware. Only master PCI function can set the uar page size + */ + if (enable_4k_uar) + dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; + else + dev->uar_page_shift = PAGE_SHIFT; + + mlx4_set_num_reserved_uars(dev, dev_cap); + } + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { struct mlx4_init_hca_param hca_param; @@ -815,16 +846,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } - /* slave gets uar page size from QUERY_HCA fw command */ - dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); + /* Set uar_page_shift for VF */ + dev->uar_page_shift = hca_param.uar_page_sz + 12; - /* TODO: relax this assumption */ - if (dev->caps.uar_page_size != PAGE_SIZE) { - mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n", - dev->caps.uar_page_size, PAGE_SIZE); - return -ENODEV; + /* Make sure the master uar page size is valid */ + if (dev->uar_page_shift > PAGE_SHIFT) { + mlx4_err(dev, + "Invalid configuration: uar page size is larger than system page size\n"); + return -ENODEV; } + /* Set reserved_uars based on the uar_page_shift */ + mlx4_set_num_reserved_uars(dev, &dev_cap); + + /* Although uar page size in FW differs from system page size, + * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) + * still works with assumption that uar page size == system page size + */ + dev->caps.uar_page_size = PAGE_SIZE; + memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { @@ -1051,36 +1091,20 @@ static ssize_t show_port_type(struct device *dev, return strlen(buf); } -static ssize_t set_port_type(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static int __set_port_type(struct mlx4_port_info *info, + enum mlx4_port_type port_type) { - struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, - port_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; - static DEFINE_MUTEX(set_port_type_mutex); int i; int err = 0; - mutex_lock(&set_port_type_mutex); - - if (!strcmp(buf, "ib\n")) - info->tmp_type = MLX4_PORT_TYPE_IB; - else if (!strcmp(buf, "eth\n")) - info->tmp_type = MLX4_PORT_TYPE_ETH; - else if (!strcmp(buf, "auto\n")) - info->tmp_type = MLX4_PORT_TYPE_AUTO; - else { - mlx4_err(mdev, "%s is not supported port type\n", buf); - err = -EINVAL; - goto err_out; - } - mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); + info->tmp_type = port_type; + /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; @@ -1122,6 +1146,37 @@ static ssize_t set_port_type(struct device *dev, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); + + return err; +} + +static ssize_t set_port_type(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + enum mlx4_port_type port_type; + static DEFINE_MUTEX(set_port_type_mutex); + int err; + + mutex_lock(&set_port_type_mutex); + + if (!strcmp(buf, "ib\n")) { + port_type = MLX4_PORT_TYPE_IB; + } else if (!strcmp(buf, "eth\n")) { + port_type = MLX4_PORT_TYPE_ETH; + } else if (!strcmp(buf, "auto\n")) { + port_type = MLX4_PORT_TYPE_AUTO; + } else { + mlx4_err(mdev, "%s is not supported port type\n", buf); + err = -EINVAL; + goto err_out; + } + + err = __set_port_type(info, port_type); + err_out: mutex_unlock(&set_port_type_mutex); @@ -1226,6 +1281,7 @@ err_set_port: static int mlx4_mf_bond(struct mlx4_dev *dev) { int err = 0; + int nvfs; struct mlx4_slaves_pport slaves_port1; struct mlx4_slaves_pport slaves_port2; DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); @@ -1242,11 +1298,18 @@ static int mlx4_mf_bond(struct mlx4_dev *dev) return -EINVAL; } + /* number of virtual functions is number of total functions minus one + * physical function for each port. + */ + nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + + bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2; + /* limit on maximum allowed VFs */ - if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + - bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) > - MAX_MF_BOND_ALLOWED_SLAVES) + if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) { + mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n", + nvfs, MAX_MF_BOND_ALLOWED_SLAVES); return -EINVAL; + } if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); @@ -2179,8 +2242,15 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - init_hca.log_uar_sz = ilog2(dev->caps.num_uars); - init_hca.uar_page_sz = PAGE_SHIFT - 12; + if (enable_4k_uar) { + init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; + init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; + } else { + init_hca.log_uar_sz = ilog2(dev->caps.num_uars); + init_hca.uar_page_sz = PAGE_SHIFT - 12; + } + init_hca.mw_enabled = 0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) @@ -2847,8 +2917,13 @@ no_msi: static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { + struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - int err = 0; + int err; + + err = devlink_port_register(devlink, &info->devlink_port, port); + if (err) + return err; info->dev = dev; info->port = port; @@ -2873,6 +2948,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -3644,23 +3720,54 @@ err_disable_pdev: return err; } +static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type port_type) +{ + struct mlx4_port_info *info = container_of(devlink_port, + struct mlx4_port_info, + devlink_port); + enum mlx4_port_type mlx4_port_type; + + switch (port_type) { + case DEVLINK_PORT_TYPE_AUTO: + mlx4_port_type = MLX4_PORT_TYPE_AUTO; + break; + case DEVLINK_PORT_TYPE_ETH: + mlx4_port_type = MLX4_PORT_TYPE_ETH; + break; + case DEVLINK_PORT_TYPE_IB: + mlx4_port_type = MLX4_PORT_TYPE_IB; + break; + default: + return -EOPNOTSUPP; + } + + return __set_port_type(info, mlx4_port_type); +} + +static const struct devlink_ops mlx4_devlink_ops = { + .port_type_set = mlx4_devlink_port_type_set, +}; + static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { + struct devlink *devlink; struct mlx4_priv *priv; struct mlx4_dev *dev; int ret; printk_once(KERN_INFO "%s", mlx4_version); - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); + if (!devlink) return -ENOMEM; + priv = devlink_priv(devlink); dev = &priv->dev; dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); if (!dev->persist) { - kfree(priv); - return -ENOMEM; + ret = -ENOMEM; + goto err_devlink_free; } dev->persist->pdev = pdev; dev->persist->dev = dev; @@ -3669,14 +3776,23 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&dev->persist->device_state_mutex); mutex_init(&dev->persist->interface_state_mutex); + ret = devlink_register(devlink, &pdev->dev); + if (ret) + goto err_persist_free; + ret = __mlx4_init_one(pdev, id->driver_data, priv); - if (ret) { - kfree(dev->persist); - kfree(priv); - } else { - pci_save_state(pdev); - } + if (ret) + goto err_devlink_unregister; + pci_save_state(pdev); + return 0; + +err_devlink_unregister: + devlink_unregister(devlink); +err_persist_free: + kfree(dev->persist); +err_devlink_free: + devlink_free(devlink); return ret; } @@ -3777,6 +3893,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; mutex_lock(&persist->interface_state_mutex); @@ -3807,8 +3924,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); + devlink_unregister(devlink); kfree(dev->persist); - kfree(priv); + devlink_free(devlink); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e054647..42d8de892bfe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, - [MLX4_FS_UC_SNIFFER] = 0x4, - [MLX4_FS_MC_SNIFFER] = 0x5, + [MLX4_FS_MIRROR_RX_PORT] = 0x4, + [MLX4_FS_MIRROR_SX_PORT] = 0x5, + [MLX4_FS_UC_SNIFFER] = 0x6, + [MLX4_FS_MC_SNIFFER] = 0x7, }; int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7baef52db6b7..ef9683101ead 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -45,6 +45,7 @@ #include <linux/workqueue.h> #include <linux/interrupt.h> #include <linux/spinlock.h> +#include <net/devlink.h> #include <linux/mlx4/device.h> #include <linux/mlx4/driver.h> @@ -828,6 +829,7 @@ struct mlx4_port_info { struct mlx4_roce_gid_table gid_table; int base_qpn; struct cpu_rmap *rmap; + struct devlink_port devlink_port; }; struct mlx4_sense { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 35de7d2e6b34..d12ab6a73344 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -607,6 +607,7 @@ static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) +void mlx4_en_init_ptys2ethtool_map(void); void mlx4_en_update_loopback_state(struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 609c59dc854e..b3cc3ab63799 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free); int mlx4_init_uar_table(struct mlx4_dev *dev) { - if (dev->caps.num_uars <= 128) { - mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", - dev->caps.num_uars); + int num_reserved_uar = mlx4_get_num_reserved_uar(dev); + + mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift); + mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars); + + if (dev->caps.num_uars <= num_reserved_uar) { + mlx4_err( + dev, "Only %d UAR pages (need more than %d)\n", + dev->caps.num_uars, num_reserved_uar); mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 787b7bb54d52..211c65087997 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -193,10 +193,10 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) if (need_mf_bond) { if (port == 1) { mutex_lock(&table->mutex); - mutex_lock(&dup_table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&dup_table->mutex); - mutex_lock(&table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); } } else { mutex_lock(&table->mutex); @@ -389,10 +389,10 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) if (dup) { if (port == 1) { mutex_lock(&table->mutex); - mutex_lock(&dup_table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&dup_table->mutex); - mutex_lock(&table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); } } else { mutex_lock(&table->mutex); @@ -479,10 +479,10 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) if (dup) { if (port == 1) { mutex_lock(&table->mutex); - mutex_lock(&dup_table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&dup_table->mutex); - mutex_lock(&table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); } } else { mutex_lock(&table->mutex); @@ -588,10 +588,10 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, if (need_mf_bond) { if (port == 1) { mutex_lock(&table->mutex); - mutex_lock(&dup_table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&dup_table->mutex); - mutex_lock(&table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); } } else { mutex_lock(&table->mutex); @@ -764,10 +764,10 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) if (dup) { if (port == 1) { mutex_lock(&table->mutex); - mutex_lock(&dup_table->mutex); + mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&dup_table->mutex); - mutex_lock(&table->mutex); + mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING); } } else { mutex_lock(&table->mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b46dbe29ef6c..cd9b2b28df88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -915,11 +915,13 @@ static int handle_existing_counter(struct mlx4_dev *dev, u8 slave, int port, spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, counter_index, RES_COUNTER); - if (!r || r->owner != slave) + if (!r || r->owner != slave) { ret = -EINVAL; - counter = container_of(r, struct res_counter, com); - if (!counter->port) - counter->port = port; + } else { + counter = container_of(r, struct res_counter, com); + if (!counter->port) + counter->port = port; + } spin_unlock_irq(mlx4_tlock(dev)); return ret; @@ -3139,7 +3141,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, case QP_TRANS_RTS2RTS: case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: - if (slave != mlx4_master_func_num(dev)) + if (slave != mlx4_master_func_num(dev)) { if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) @@ -3158,6 +3160,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; } + } break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index c503ea05e742..1cf722eba607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -19,3 +19,15 @@ config MLX5_CORE_EN Ethernet support in Mellanox Technologies ConnectX-4 NIC. Ethernet and Infiniband support in ConnectX-4 are currently mutually exclusive. + +config MLX5_CORE_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX5_CORE_EN && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 01c0256effb8..4fc45ee0c5d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,6 +3,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o + mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o + en_txrx.o en_clock.o vxlan.o en_tc.o + +mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 037fc4cdf5af..97f5114fc113 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -560,6 +560,18 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ACCESS_REG: return "MLX5_CMD_OP_ACCESS_REG"; + case MLX5_CMD_OP_SET_WOL_ROL: + return "SET_WOL_ROL"; + + case MLX5_CMD_OP_QUERY_WOL_ROL: + return "QUERY_WOL_ROL"; + + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return "ADD_VXLAN_UDP_DPORT"; + + case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: + return "DELETE_VXLAN_UDP_DPORT"; + default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index aac071a7e830..879e6276c473 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,6 +29,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef __MLX5_EN_H__ +#define __MLX5_EN_H__ #include <linux/if_vlan.h> #include <linux/etherdevice.h> @@ -38,8 +40,10 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/transobj.h> +#include <linux/rhashtable.h> #include "wq.h" #include "mlx5_core.h" @@ -69,6 +73,11 @@ #define MLX5E_NUM_MAIN_GROUPS 9 +#ifdef CONFIG_MLX5_CORE_EN_DCB +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ +#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ +#endif + static const char vport_strings[][ETH_GSTRING_LEN] = { /* vport statistics */ "rx_packets", @@ -95,12 +104,15 @@ static const char vport_strings[][ETH_GSTRING_LEN] = { /* SW counters */ "tso_packets", "tso_bytes", + "tso_inner_packets", + "tso_inner_bytes", "lro_packets", "lro_bytes", "rx_csum_good", "rx_csum_none", "rx_csum_sw", "tx_csum_offload", + "tx_csum_inner", "tx_queue_stopped", "tx_queue_wake", "tx_queue_dropped", @@ -133,18 +145,21 @@ struct mlx5e_vport_stats { /* SW counters */ u64 tso_packets; u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; u64 lro_packets; u64 lro_bytes; u64 rx_csum_good; u64 rx_csum_none; u64 rx_csum_sw; u64 tx_csum_offload; + u64 tx_csum_inner; u64 tx_queue_stopped; u64 tx_queue_wake; u64 tx_queue_dropped; u64 rx_wqe_err; -#define NUM_VPORT_COUNTERS 32 +#define NUM_VPORT_COUNTERS 35 }; static const char pport_strings[][ETH_GSTRING_LEN] = { @@ -223,6 +238,7 @@ struct mlx5e_pport_stats { static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "packets", + "bytes", "csum_none", "csum_sw", "lro_packets", @@ -232,35 +248,46 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = { struct mlx5e_rq_stats { u64 packets; + u64 bytes; u64 csum_none; u64 csum_sw; u64 lro_packets; u64 lro_bytes; u64 wqe_err; -#define NUM_RQ_STATS 6 +#define NUM_RQ_STATS 7 }; static const char sq_stats_strings[][ETH_GSTRING_LEN] = { "packets", + "bytes", "tso_packets", "tso_bytes", + "tso_inner_packets", + "tso_inner_bytes", + "csum_offload_inner", + "nop", "csum_offload_none", "stopped", "wake", "dropped", - "nop" }; struct mlx5e_sq_stats { + /* commonly accessed in data path */ u64 packets; + u64 bytes; u64 tso_packets; u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_offload_inner; + u64 nop; + /* less likely accessed in data path */ u64 csum_offload_none; u64 stopped; u64 wake; u64 dropped; - u64 nop; -#define NUM_SQ_STATS 8 +#define NUM_SQ_STATS 12 }; struct mlx5e_stats { @@ -272,7 +299,6 @@ struct mlx5e_params { u8 log_sq_size; u8 log_rq_size; u16 num_channels; - u8 default_vlan_prio; u8 num_tc; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; @@ -285,6 +311,9 @@ struct mlx5e_params { u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct ieee_ets ets; +#endif }; struct mlx5e_tstamp { @@ -304,14 +333,9 @@ enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, }; -enum cq_flags { - MLX5E_CQ_HAS_CQES = 1, -}; - struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; - unsigned long flags; /* data path - accessed per napi poll */ struct napi_struct *napi; @@ -364,6 +388,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, + MLX5E_SQ_STATE_BF_ENABLE, }; struct mlx5e_sq { @@ -392,7 +417,6 @@ struct mlx5e_sq { struct mlx5_wq_cyc wq; u32 dma_fifo_mask; void __iomem *uar_map; - void __iomem *uar_bf_map; struct netdev_queue *txq; u32 sqn; u16 bf_buf_size; @@ -452,6 +476,8 @@ enum mlx5e_traffic_types { MLX5E_NUM_TT, }; +#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY) + enum mlx5e_rqt_ix { MLX5E_INDIRECTION_RQT, MLX5E_SINGLE_RQ_RQT, @@ -491,21 +517,33 @@ struct mlx5e_vlan_db { bool filter_disabled; }; +struct mlx5e_vxlan_db { + spinlock_t lock; /* protect vxlan table */ + struct radix_tree_root tree; +}; + struct mlx5e_flow_table { int num_groups; struct mlx5_flow_table *t; struct mlx5_flow_group **g; }; +struct mlx5e_tc_flow_table { + struct mlx5_flow_table *t; + + struct rhashtable_params ht_params; + struct rhashtable ht; +}; + struct mlx5e_flow_tables { struct mlx5_flow_namespace *ns; + struct mlx5e_tc_flow_table tc; struct mlx5e_flow_table vlan; struct mlx5e_flow_table main; }; struct mlx5e_priv { /* priv data path fields - start */ - int default_vlan_prio; struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; /* priv data path fields - end */ @@ -515,7 +553,7 @@ struct mlx5e_priv { struct mlx5_uar cq_uar; u32 pdn; u32 tdn; - struct mlx5_core_mr mr; + struct mlx5_core_mkey mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; @@ -526,9 +564,9 @@ struct mlx5e_priv { struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; + struct mlx5e_vxlan_db vxlan; struct mlx5e_params params; - spinlock_t async_events_spinlock; /* sync hw events */ struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; struct delayed_work update_stats_work; @@ -591,7 +629,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); void mlx5e_completion_event(struct mlx5_core_cq *mcq); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); -bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); @@ -618,9 +656,12 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); +void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5e_tx_wqe *wqe, int bf_sz) @@ -636,16 +677,12 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, * doorbell */ wmb(); - - if (bf_sz) { - __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz); - - /* flush the write-combining mapped buffer */ - wmb(); - - } else { + if (bf_sz) + __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz); + else mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); - } + /* flush the write-combining mapped buffer */ + wmb(); sq->bf_offset ^= sq->bf_buf_size; } @@ -665,4 +702,11 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) } extern const struct ethtool_ops mlx5e_ethtool_ops; +#ifdef CONFIG_MLX5_CORE_EN_DCB +extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; +int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); +#endif + u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); + +#endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index be6543570b2b..2018eebe1531 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -62,10 +62,11 @@ static void mlx5e_timestamp_overflow(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, overflow_work); + unsigned long flags; - write_lock(&tstamp->lock); + write_lock_irqsave(&tstamp->lock, flags); timecounter_read(&tstamp->clock); - write_unlock(&tstamp->lock); + write_unlock_irqrestore(&tstamp->lock, flags); schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); } @@ -136,10 +137,11 @@ static int mlx5e_ptp_settime(struct ptp_clock_info *ptp, struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); u64 ns = timespec64_to_ns(ts); + unsigned long flags; - write_lock(&tstamp->lock); + write_lock_irqsave(&tstamp->lock, flags); timecounter_init(&tstamp->clock, &tstamp->cycles, ns); - write_unlock(&tstamp->lock); + write_unlock_irqrestore(&tstamp->lock, flags); return 0; } @@ -150,10 +152,11 @@ static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp, struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); u64 ns; + unsigned long flags; - write_lock(&tstamp->lock); + write_lock_irqsave(&tstamp->lock, flags); ns = timecounter_read(&tstamp->clock); - write_unlock(&tstamp->lock); + write_unlock_irqrestore(&tstamp->lock, flags); *ts = ns_to_timespec64(ns); @@ -164,10 +167,11 @@ static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); + unsigned long flags; - write_lock(&tstamp->lock); + write_lock_irqsave(&tstamp->lock, flags); timecounter_adjtime(&tstamp->clock, delta); - write_unlock(&tstamp->lock); + write_unlock_irqrestore(&tstamp->lock, flags); return 0; } @@ -176,6 +180,7 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) { u64 adj; u32 diff; + unsigned long flags; int neg_adj = 0; struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); @@ -189,11 +194,11 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock(&tstamp->lock); + write_lock_irqsave(&tstamp->lock, flags); timecounter_read(&tstamp->clock); tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff : tstamp->nominal_c_mult + diff; - write_unlock(&tstamp->lock); + write_unlock_irqrestore(&tstamp->lock, flags); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c new file mode 100644 index 000000000000..3036f279a8fd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/device.h> +#include <linux/netdevice.h> +#include "en.h" + +#define MLX5E_MAX_PRIORITY 8 + +#define MLX5E_100MB (100000) +#define MLX5E_1GB (1000000) + +static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -ENOTSUPP; + + memcpy(ets, &priv->params.ets, sizeof(*ets)); + return 0; +} + +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_ETS_TC_GROUP_NUM = 0, +}; + +static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) +{ + bool any_tc_mapped_to_ets = false; + int strict_group; + int i; + + for (i = 0; i <= max_tc; i++) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + any_tc_mapped_to_ets = true; + + strict_group = any_tc_mapped_to_ets ? 1 : 0; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_group[i] = strict_group++; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_group[i] = MLX5E_ETS_TC_GROUP_NUM; + break; + } + } +} + +static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, + u8 *tc_group, int max_tc) +{ + int i; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC; + break; + } + } +} + +int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + int max_tc = mlx5_max_tc(mdev); + int err; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -ENOTSUPP; + + mlx5e_build_tc_group(ets, tc_group, max_tc); + mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); + + err = mlx5_set_port_prio_tc(mdev, ets->prio_tc); + if (err) + return err; + + err = mlx5_set_port_tc_group(mdev, tc_group); + if (err) + return err; + + return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); +} + +static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) +{ + int bw_sum = 0; + int i; + + /* Validate Priority */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) + return -EINVAL; + } + + /* Validate Bandwidth Sum */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + bw_sum += ets->tc_tx_bw[i]; + } + + if (bw_sum != 0 && bw_sum != 100) + return -EINVAL; + return 0; +} + +static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_dbcnl_validate_ets(ets); + if (err) + return err; + + err = mlx5e_dcbnl_ieee_setets_core(priv, ets); + if (err) + return err; + + memcpy(&priv->params.ets, ets, sizeof(*ets)); + priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; + + return 0; +} + +static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + + return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); +} + +static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + enum mlx5_port_status ps; + u8 curr_pfc_en; + int ret; + + mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL); + + if (pfc->pfc_en == curr_pfc_en) + return 0; + + mlx5_query_port_admin_status(mdev, &ps); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + + ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); + + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + + return ret; +} + +static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + (mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_VER_IEEE) || + !(mode & DCB_CAP_DCBX_HOST)) + return 1; + + return 0; +} + +static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + int err; + int i; + + err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + return err; + + memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + switch (max_bw_unit[i]) { + case MLX5_100_MBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB; + break; + case MLX5_GBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB; + break; + case MLX5_BW_NO_LIMIT: + break; + default: + WARN(true, "non-supported BW unit"); + break; + } + } + + return 0; +} + +static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + int i; + + memset(max_bw_value, 0, sizeof(max_bw_value)); + memset(max_bw_unit, 0, sizeof(max_bw_unit)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + if (!maxrate->tc_maxrate[i]) { + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + continue; + } + if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_100MB); + max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; + max_bw_unit[i] = MLX5_100_MBPS_UNIT; + } else { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_1GB); + max_bw_unit[i] = MLX5_GBPS_UNIT; + } + } + + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); +} + +const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { + .ieee_getets = mlx5e_dcbnl_ieee_getets, + .ieee_setets = mlx5e_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .getdcbx = mlx5e_dcbnl_getdcbx, + .setdcbx = mlx5e_dcbnl_setdcbx, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 65624ac65b4c..68834b715f6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -211,13 +211,14 @@ static void mlx5e_get_strings(struct net_device *dev, sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, rq_stats_strings[j]); - for (i = 0; i < priv->params.num_channels; i++) - for (tc = 0; tc < priv->params.num_tc; tc++) + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + - (idx++) * ETH_GSTRING_LEN, - "tx%d_%d_%s", i, tc, - sq_stats_strings[j]); + (idx++) * ETH_GSTRING_LEN, + "tx%d_%s", + priv->channeltc_to_txq_map[i][tc], + sq_stats_strings[j]); break; } } @@ -249,8 +250,8 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, &priv->state) ? 0 : ((u64 *)&priv->channel[i]->rq.stats)[j]; - for (i = 0; i < priv->params.num_channels; i++) - for (tc = 0; tc < priv->params.num_tc; tc++) + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) data[idx++] = !test_bit(MLX5E_STATE_OPENED, &priv->state) ? 0 : @@ -385,6 +386,8 @@ static int mlx5e_set_channels(struct net_device *dev, mlx5e_close_locked(dev); priv->params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); @@ -399,6 +402,9 @@ static int mlx5e_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) + return -ENOTSUPP; + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; @@ -416,11 +422,18 @@ static int mlx5e_set_coalesce(struct net_device *netdev, int tc; int i; + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -ENOTSUPP; + + mutex_lock(&priv->state_lock); priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + for (i = 0; i < priv->params.num_channels; ++i) { c = priv->channel[i]; @@ -436,6 +449,8 @@ static int mlx5e_set_coalesce(struct net_device *netdev, coal->rx_max_coalesced_frames); } +out: + mutex_unlock(&priv->state_lock); return 0; } @@ -703,18 +718,36 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return 0; } +static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + int i; + + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); + mlx5e_build_tir_ctx_hash(tirc, priv); + + for (i = 0; i < MLX5E_NUM_TT; i++) + if (IS_HASHING_TT(i)) + mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen); +} + static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); - bool close_open; - int err = 0; + int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_XOR) && (hfunc != ETH_RSS_HASH_TOP)) return -EINVAL; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + mutex_lock(&priv->state_lock); if (indir) { @@ -723,11 +756,6 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); } - close_open = (key || (hfunc != ETH_RSS_HASH_NO_CHANGE)) && - test_bit(MLX5E_STATE_OPENED, &priv->state); - if (close_open) - mlx5e_close_locked(dev); - if (key) memcpy(priv->params.toeplitz_hash_key, key, sizeof(priv->params.toeplitz_hash_key)); @@ -735,12 +763,13 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, if (hfunc != ETH_RSS_HASH_NO_CHANGE) priv->params.rss_hfunc = hfunc; - if (close_open) - err = mlx5e_open_locked(priv->netdev); + mlx5e_modify_tirs_hash(priv, in, inlen); mutex_unlock(&priv->state_lock); - return err; + kvfree(in); + + return 0; } static int mlx5e_get_rxnfc(struct net_device *netdev, @@ -884,6 +913,129 @@ static int mlx5e_get_ts_info(struct net_device *dev, return 0; } +static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) +{ + __u32 ret = 0; + + if (MLX5_CAP_GEN(mdev, wol_g)) + ret |= WAKE_MAGIC; + + if (MLX5_CAP_GEN(mdev, wol_s)) + ret |= WAKE_MAGICSECURE; + + if (MLX5_CAP_GEN(mdev, wol_a)) + ret |= WAKE_ARP; + + if (MLX5_CAP_GEN(mdev, wol_b)) + ret |= WAKE_BCAST; + + if (MLX5_CAP_GEN(mdev, wol_m)) + ret |= WAKE_MCAST; + + if (MLX5_CAP_GEN(mdev, wol_u)) + ret |= WAKE_UCAST; + + if (MLX5_CAP_GEN(mdev, wol_p)) + ret |= WAKE_PHY; + + return ret; +} + +static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) +{ + __u32 ret = 0; + + if (mode & MLX5_WOL_MAGIC) + ret |= WAKE_MAGIC; + + if (mode & MLX5_WOL_SECURED_MAGIC) + ret |= WAKE_MAGICSECURE; + + if (mode & MLX5_WOL_ARP) + ret |= WAKE_ARP; + + if (mode & MLX5_WOL_BROADCAST) + ret |= WAKE_BCAST; + + if (mode & MLX5_WOL_MULTICAST) + ret |= WAKE_MCAST; + + if (mode & MLX5_WOL_UNICAST) + ret |= WAKE_UCAST; + + if (mode & MLX5_WOL_PHY_ACTIVITY) + ret |= WAKE_PHY; + + return ret; +} + +static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode) +{ + u8 ret = 0; + + if (mode & WAKE_MAGIC) + ret |= MLX5_WOL_MAGIC; + + if (mode & WAKE_MAGICSECURE) + ret |= MLX5_WOL_SECURED_MAGIC; + + if (mode & WAKE_ARP) + ret |= MLX5_WOL_ARP; + + if (mode & WAKE_BCAST) + ret |= MLX5_WOL_BROADCAST; + + if (mode & WAKE_MCAST) + ret |= MLX5_WOL_MULTICAST; + + if (mode & WAKE_UCAST) + ret |= MLX5_WOL_UNICAST; + + if (mode & WAKE_PHY) + ret |= MLX5_WOL_PHY_ACTIVITY; + + return ret; +} + +static void mlx5e_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mlx5_wol_mode; + int err; + + memset(wol, 0, sizeof(*wol)); + + wol->supported = mlx5e_get_wol_supported(mdev); + if (!wol->supported) + return; + + err = mlx5_query_port_wol(mdev, &mlx5_wol_mode); + if (err) + return; + + wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode); +} + +static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + __u32 wol_supported = mlx5e_get_wol_supported(mdev); + u32 mlx5_wol_mode; + + if (!wol_supported) + return -ENOTSUPP; + + if (wol->wolopts & ~wol_supported) + return -EINVAL; + + mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts); + + return mlx5_set_port_wol(mdev, mlx5_wol_mode); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -908,4 +1060,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, + .get_wol = mlx5e_get_wol, + .set_wol = mlx5e_set_wol, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 80d81abc4820..d00a24203410 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6a3e430f1062..e0adb604f461 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,9 +30,14 @@ * SOFTWARE. */ +#include <net/tc_act/tc_gact.h> +#include <net/pkt_cls.h> #include <linux/mlx5/fs.h> +#include <net/vxlan.h> #include "en.h" +#include "en_tc.h" #include "eswitch.h" +#include "vxlan.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -141,11 +146,18 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) return; /* Collect firts the SW counters and then HW for consistency */ + s->rx_packets = 0; + s->rx_bytes = 0; + s->tx_packets = 0; + s->tx_bytes = 0; s->tso_packets = 0; s->tso_bytes = 0; + s->tso_inner_packets = 0; + s->tso_inner_bytes = 0; s->tx_queue_stopped = 0; s->tx_queue_wake = 0; s->tx_queue_dropped = 0; + s->tx_csum_inner = 0; tx_offload_none = 0; s->lro_packets = 0; s->lro_bytes = 0; @@ -155,6 +167,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; s->lro_packets += rq_stats->lro_packets; s->lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; @@ -164,11 +178,16 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; s->tso_packets += sq_stats->tso_packets; s->tso_bytes += sq_stats->tso_bytes; + s->tso_inner_packets += sq_stats->tso_inner_packets; + s->tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; + s->tx_csum_inner += sq_stats->csum_offload_inner; tx_offload_none += sq_stats->csum_offload_none; } } @@ -225,25 +244,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->tx_broadcast_bytes = MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); - s->rx_packets = - s->rx_unicast_packets + - s->rx_multicast_packets + - s->rx_broadcast_packets; - s->rx_bytes = - s->rx_unicast_bytes + - s->rx_multicast_bytes + - s->rx_broadcast_bytes; - s->tx_packets = - s->tx_unicast_packets + - s->tx_multicast_packets + - s->tx_broadcast_packets; - s->tx_bytes = - s->tx_unicast_bytes + - s->tx_multicast_bytes + - s->tx_broadcast_bytes; - /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none; + s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; s->rx_csum_good = s->rx_packets - s->rx_csum_none - s->rx_csum_sw; @@ -267,9 +269,14 @@ static void mlx5e_update_stats_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void __mlx5e_async_event(struct mlx5e_priv *priv, - enum mlx5_dev_event event) +static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) { + struct mlx5e_priv *priv = vpriv; + + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + return; + switch (event) { case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: @@ -281,17 +288,6 @@ static void __mlx5e_async_event(struct mlx5e_priv *priv, } } -static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, - enum mlx5_dev_event event, unsigned long param) -{ - struct mlx5e_priv *priv = vpriv; - - spin_lock(&priv->async_events_spinlock); - if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) - __mlx5e_async_event(priv, event); - spin_unlock(&priv->async_events_spinlock); -} - static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); @@ -299,9 +295,8 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - spin_lock_irq(&priv->async_events_spinlock); clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); - spin_unlock_irq(&priv->async_events_spinlock); + synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) @@ -547,7 +542,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, int txq_ix; int err; - err = mlx5_alloc_map_uar(mdev, &sq->uar); + err = mlx5_alloc_map_uar(mdev, &sq->uar, true); if (err) return err; @@ -559,8 +554,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - sq->uar_map = sq->uar.map; - sq->uar_bf_map = sq->uar.bf_map; + if (sq->uar.bf_map) { + set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); + sq->uar_map = sq->uar.bf_map; + } else { + sq->uar_map = sq->uar.map; + } sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; @@ -869,12 +868,10 @@ static int mlx5e_open_cq(struct mlx5e_channel *c, if (err) goto err_destroy_cq; - err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation_usecs, - moderation_frames); - if (err) - goto err_destroy_cq; - + if (MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, + moderation_usecs, + moderation_frames); return 0; err_destroy_cq: @@ -982,7 +979,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mr.key); + c->mkey_be = cpu_to_be32(priv->mkey.key); c->num_tc = priv->params.num_tc; mlx5e_build_channeltc_to_txq_map(priv, ix); @@ -1063,6 +1060,15 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, param->wq.linear = 1; } +static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); +} + static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { @@ -1199,7 +1205,6 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - ix = ix % priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], test_bit(MLX5E_STATE_OPENED, &priv->state) ? priv->channel[ix]->rq.rqn : @@ -1317,7 +1322,22 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) lro_timer_supported_periods[2])); } -static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) +void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) +{ + MLX5_SET(tirc, tirc, rx_hash_fn, + mlx5e_rx_hash_fn(priv->params.rss_hfunc)); + if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, priv->params.toeplitz_hash_key, len); + } +} + +static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1325,6 +1345,7 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) void *tirc; int inlen; int err; + int tt; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1336,7 +1357,11 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + if (err) + break; + } kvfree(in); @@ -1400,6 +1425,24 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) return 0; } +static void mlx5e_netdev_set_tcs(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int nch = priv->params.num_channels; + int ntc = priv->params.num_tc; + int tc; + + netdev_reset_tc(netdev); + + if (ntc == 1) + return; + + netdev_set_num_tc(netdev, ntc); + + for (tc = 0; tc < ntc; tc++) + netdev_set_tc_queue(netdev, tc, nch, tc * nch); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -1408,6 +1451,8 @@ int mlx5e_open_locked(struct net_device *netdev) set_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_netdev_set_tcs(netdev); + num_txqs = priv->params.num_channels * priv->params.num_tc; netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); @@ -1430,8 +1475,8 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_close_channels; } - mlx5e_update_carrier(priv); mlx5e_redirect_rqts(priv); + mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); schedule_delayed_work(&priv->update_stats_work, 0); @@ -1470,8 +1515,8 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_timestamp_cleanup(priv); - mlx5e_redirect_rqts(priv); netif_carrier_off(priv->netdev); + mlx5e_redirect_rqts(priv); mlx5e_close_channels(priv); return 0; @@ -1553,8 +1598,7 @@ static int mlx5e_open_drop_rq(struct mlx5e_priv *priv) memset(&cq_param, 0, sizeof(cq_param)); memset(&rq_param, 0, sizeof(rq_param)); - mlx5e_build_rx_cq_param(priv, &cq_param); - mlx5e_build_rq_param(priv, &rq_param); + mlx5e_build_drop_rq_param(&rq_param); err = mlx5e_create_drop_cq(priv, cq, &cq_param); if (err) @@ -1602,7 +1646,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, prio, tc); + MLX5_SET(tisc, tisc, prio, tc << 1); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); @@ -1618,7 +1662,7 @@ static int mlx5e_create_tises(struct mlx5e_priv *priv) int err; int tc; - for (tc = 0; tc < priv->params.num_tc; tc++) { + for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) { err = mlx5e_create_tis(priv, tc); if (err) goto err_close_tises; @@ -1637,7 +1681,7 @@ static void mlx5e_destroy_tises(struct mlx5e_priv *priv) { int tc; - for (tc = 0; tc < priv->params.num_tc; tc++) + for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) mlx5e_destroy_tis(priv, tc); } @@ -1672,17 +1716,7 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) default: MLX5_SET(tirc, tirc, indirect_table, priv->rqtn[MLX5E_INDIRECTION_RQT]); - MLX5_SET(tirc, tirc, rx_hash_fn, - mlx5e_rx_hash_fn(priv->params.rss_hfunc)); - if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { - void *rss_key = MLX5_ADDR_OF(tirc, tirc, - rx_hash_toeplitz_key); - size_t len = MLX5_FLD_SZ_BYTES(tirc, - rx_hash_toeplitz_key); - - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, priv->params.toeplitz_hash_key, len); - } + mlx5e_build_tir_ctx_hash(tirc, priv); break; } @@ -1824,6 +1858,58 @@ static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv, i); } +static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool was_opened; + int err = 0; + + if (tc && tc != MLX5E_MAX_NUM_TC) + return -EINVAL; + + mutex_lock(&priv->state_lock); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params.num_tc = tc ? tc : 1; + + if (was_opened) + err = mlx5e_open_locked(priv->netdev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + goto mqprio; + + switch (tc->type) { + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, tc->cls_flower); + } + default: + return -EOPNOTSUPP; + } + +mqprio: + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return mlx5e_setup_tc(dev, tc->tc); +} + static struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -1885,8 +1971,10 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP); + err = mlx5e_modify_tirs_lro(priv); + if (err) + mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", + err); if (was_opened) err = mlx5e_open_locked(priv->netdev); @@ -1901,6 +1989,13 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_disable_vlan_filter(priv); } + if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) && + mlx5e_tc_num_filters(priv)) { + netdev_err(netdev, + "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; + } + return err; } @@ -2024,18 +2119,116 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -static struct net_device_ops mlx5e_netdev_ops = { +static void mlx5e_add_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!mlx5e_vxlan_allowed(priv->mdev)) + return; + + mlx5e_vxlan_add_port(priv, be16_to_cpu(port)); +} + +static void mlx5e_del_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!mlx5e_vxlan_allowed(priv->mdev)) + return; + + mlx5e_vxlan_del_port(priv, be16_to_cpu(port)); +} + +static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) +{ + struct udphdr *udph; + u16 proto; + u16 port = 0; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + proto = ipv6_hdr(skb)->nexthdr; + break; + default: + goto out; + } + + if (proto == IPPROTO_UDP) { + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + } + + /* Verify if UDP port is being offloaded by HW */ + if (port && mlx5e_vxlan_lookup_port(priv, port)) + return features; + +out: + /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +static netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && + (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) + return mlx5e_vxlan_features_check(priv, skb, features); + + return features; +} + +static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_ndo_setup_tc, + .ndo_select_queue = mlx5e_select_queue, .ndo_get_stats64 = mlx5e_get_stats, .ndo_set_rx_mode = mlx5e_set_rx_mode, .ndo_set_mac_address = mlx5e_set_mac, - .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, - .ndo_do_ioctl = mlx5e_ioctl, + .ndo_change_mtu = mlx5e_change_mtu, + .ndo_do_ioctl = mlx5e_ioctl, +}; + +static const struct net_device_ops mlx5e_netdev_ops_sriov = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_ndo_setup_tc, + .ndo_select_queue = mlx5e_select_queue, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_change_mtu = mlx5e_change_mtu, + .ndo_do_ioctl = mlx5e_ioctl, + .ndo_add_vxlan_port = mlx5e_add_vxlan_port, + .ndo_del_vxlan_port = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_set_vf_link_state = mlx5e_set_vf_link_state, + .ndo_get_vf_stats = mlx5e_get_vf_stats, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2057,6 +2250,8 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) } if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_warn(mdev, "CQ modiration is not supported\n"); return 0; } @@ -2070,12 +2265,38 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } +#ifdef CONFIG_MLX5_CORE_EN_DCB +static void mlx5e_ets_init(struct mlx5e_priv *priv) +{ + int i; + + priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < priv->params.ets.ets_cap; i++) { + priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + priv->params.ets.prio_tc[i] = i; + } + + /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ + priv->params.ets.prio_tc[0] = 1; + priv->params.ets.prio_tc[1] = 0; +} +#endif + +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels) +{ + int i; + + for (i = 0; i < len; i++) + indirection_rqt[i] = i % num_channels; +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) { struct mlx5e_priv *priv = netdev_priv(netdev); - int i; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2093,14 +2314,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.min_rx_wqes = MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.num_tc = 1; - priv->params.default_vlan_prio = 0; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) - priv->params.indirection_rqt[i] = i % num_channels; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -2108,9 +2328,11 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->mdev = mdev; priv->netdev = netdev; priv->params.num_channels = num_channels; - priv->default_vlan_prio = priv->params.default_vlan_prio; - spin_lock_init(&priv->async_events_spinlock); +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_ets_init(priv); +#endif + mutex_init(&priv->state_lock); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); @@ -2137,18 +2359,15 @@ static void mlx5e_build_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, &mdev->pdev->dev); - if (priv->params.num_tc > 1) - mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; - mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; - mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; - mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; - mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + netdev->netdev_ops = &mlx5e_netdev_ops_sriov; +#ifdef CONFIG_MLX5_CORE_EN_DCB + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +#endif + } else { + netdev->netdev_ops = &mlx5e_netdev_ops_basic; } - netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ; netdev->ethtool_ops = &mlx5e_ethtool_ops; @@ -2170,10 +2389,27 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_IP_CSUM; + netdev->hw_enc_features |= NETIF_F_RXCSUM; + netdev->hw_enc_features |= NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_RXHASH; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + } + netdev->features = netdev->hw_features; if (!priv->params.lro_en) netdev->features &= ~NETIF_F_LRO; +#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) + if (FT_CAP(flow_modify_en) && + FT_CAP(modify_root) && + FT_CAP(identified_miss_table_mode) && + FT_CAP(flow_table_modify)) + priv->netdev->hw_features |= NETIF_F_HW_TC; + netdev->features |= NETIF_F_HIGHDMA; netdev->priv_flags |= IFF_UNICAST_FLT; @@ -2182,7 +2418,7 @@ static void mlx5e_build_netdev(struct net_device *netdev) } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mr *mr) + struct mlx5_core_mkey *mkey) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_create_mkey_mbox_in *in; @@ -2198,7 +2434,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, NULL); kvfree(in); @@ -2216,7 +2452,9 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) if (mlx5e_check_required_hca_cap(mdev)) return NULL; - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch, nch); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + nch * MLX5E_MAX_NUM_TC, + nch); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; @@ -2229,7 +2467,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) priv = netdev_priv(netdev); - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); + err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false); if (err) { mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); goto err_free_netdev; @@ -2247,7 +2485,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_pd; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); if (err) { mlx5_core_err(mdev, "create mkey failed, %d\n", err); goto err_dealloc_transport_domain; @@ -2291,17 +2529,33 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) mlx5e_init_eth_addr(priv); + mlx5e_vxlan_init(priv); + + err = mlx5e_tc_init(priv); + if (err) + goto err_destroy_flow_tables; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); +#endif + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_destroy_flow_tables; + goto err_tc_cleanup; } + if (mlx5e_vxlan_allowed(mdev)) + vxlan_get_rx_port(netdev); + mlx5e_enable_async_events(priv); schedule_work(&priv->set_rx_mode_work); return priv; +err_tc_cleanup: + mlx5e_tc_cleanup(priv); + err_destroy_flow_tables: mlx5e_destroy_flow_tables(priv); @@ -2321,7 +2575,7 @@ err_destroy_tises: mlx5e_destroy_tises(priv); err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mr); + mlx5_core_destroy_mkey(mdev, &priv->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, priv->tdn); @@ -2349,13 +2603,15 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_disable_async_events(priv); flush_scheduled_work(); unregister_netdev(netdev); + mlx5e_tc_cleanup(priv); + mlx5e_vxlan_cleanup(priv); mlx5e_destroy_flow_tables(priv); mlx5e_destroy_tirs(priv); mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dd959d929aad..58d4e2f962c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -35,6 +35,7 @@ #include <linux/tcp.h> #include <net/busy_poll.h> #include "en.h" +#include "en_tc.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -167,14 +168,15 @@ static inline bool is_first_ethertype_ip(struct sk_buff *skb) static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) + struct sk_buff *skb, + bool lro) { if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; - if (likely(cqe->hds_ip_ext & CQE_L4_OK)) { + if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (is_first_ethertype_ip(skb)) { + } else if (likely(is_first_ethertype_ip(skb))) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); rq->stats.csum_sw++; @@ -211,7 +213,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_rx_hw_stamp(tstamp))) mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); - mlx5e_handle_csum(netdev, cqe, rq, skb); + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); skb->protocol = eth_type_trans(skb, netdev); @@ -223,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (cqe_has_vlan(cqe)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->vlan_info)); + + skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) @@ -230,10 +234,6 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done; - /* avoid accessing cq (dma coherent memory) if not needed */ - if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) - return 0; - for (work_done = 0; work_done < budget; work_done++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; @@ -267,6 +267,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5e_build_rx_skb(cqe, rq, skb); rq->stats.packets++; + rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); napi_gro_receive(cq->napi, skb); wq_ll_pop: @@ -279,8 +280,5 @@ wq_ll_pop: /* ensure cq space is freed before enabling more cqes */ wmb(); - if (work_done == budget) - set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); - return work_done; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c new file mode 100644 index 000000000000..b3de09f13425 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/flow_dissector.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_skbedit.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include <linux/rhashtable.h> +#include "en.h" +#include "en_tc.h" + +struct mlx5e_tc_flow { + struct rhash_head node; + u64 cookie; + struct mlx5_flow_rule *rule; +}; + +#define MLX5E_TC_FLOW_TABLE_NUM_ENTRIES 1024 +#define MLX5E_TC_FLOW_TABLE_NUM_GROUPS 4 + +static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, + u32 *match_c, u32 *match_v, + u32 action, u32 flow_tag) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + {.ft = priv->fts.vlan.t}, + }; + struct mlx5_flow_rule *rule; + bool table_created = false; + + if (IS_ERR_OR_NULL(priv->fts.tc.t)) { + priv->fts.tc.t = + mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0, + MLX5E_TC_FLOW_TABLE_NUM_ENTRIES, + MLX5E_TC_FLOW_TABLE_NUM_GROUPS); + if (IS_ERR(priv->fts.tc.t)) { + netdev_err(priv->netdev, + "Failed to create tc offload table\n"); + return ERR_CAST(priv->fts.tc.t); + } + + table_created = true; + } + + rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS, + match_c, match_v, + action, flow_tag, + action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL); + + if (IS_ERR(rule) && table_created) { + mlx5_destroy_flow_table(priv->fts.tc.t); + priv->fts.tc.t = NULL; + } + + return rule; +} + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5_flow_rule *rule) +{ + mlx5_del_flow_rule(rule); + + if (!mlx5e_tc_num_filters(priv)) { + mlx5_destroy_flow_table(priv->fts.tc.t); + priv->fts.tc.t = NULL; + } +} + +static int parse_cls_flower(struct mlx5e_priv *priv, + u32 *match_c, u32 *match_v, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + addr_type = key->addr_type; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + ip_proto = key->ip_proto; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(key->n_proto)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + key->ip_proto); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + mask->dst); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + key->dst); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + mask->src); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + key->src); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &mask->src, sizeof(mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &key->src, sizeof(key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &mask->dst, sizeof(mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &key->dst, sizeof(key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, sizeof(mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, sizeof(key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, sizeof(mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, sizeof(key->dst)); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(key->dst)); + break; + default: + netdev_err(priv->netdev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + } + + return 0; +} + +static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *flow_tag) +{ + const struct tc_action *a; + + if (tc_no_actions(exts)) + return -EINVAL; + + *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + *action = 0; + + tc_for_each_action(a, exts) { + /* Only support a single action per rule */ + if (*action) + return -EINVAL; + + if (is_tcf_gact_shot(a)) { + *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + continue; + } + + if (is_tcf_skbedit_mark(a)) { + u32 mark = tcf_skbedit_mark(a); + + if (mark & ~MLX5E_TC_FLOW_ID_MASK) { + netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n", + mark); + return -EINVAL; + } + + *flow_tag = mark; + *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + + return 0; +} + +int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, + struct tc_cls_flower_offload *f) +{ + struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + u32 *match_c; + u32 *match_v; + int err = 0; + u32 flow_tag; + u32 action; + struct mlx5e_tc_flow *flow; + struct mlx5_flow_rule *old = NULL; + + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, + tc->ht_params); + if (flow) + old = flow->rule; + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_c || !match_v || !flow) { + err = -ENOMEM; + goto err_free; + } + + flow->cookie = f->cookie; + + err = parse_cls_flower(priv, match_c, match_v, f); + if (err < 0) + goto err_free; + + err = parse_tc_actions(priv, f->exts, &action, &flow_tag); + if (err < 0) + goto err_free; + + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + goto err_free; + + flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action, + flow_tag); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + goto err_hash_del; + } + + if (old) + mlx5e_tc_del_flow(priv, old); + + goto out; + +err_hash_del: + rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); + +err_free: + if (!old) + kfree(flow); +out: + kfree(match_c); + kfree(match_v); + return err; +} + +int mlx5e_delete_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct mlx5e_tc_flow *flow; + struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, + tc->ht_params); + if (!flow) + return -EINVAL; + + rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); + + mlx5e_tc_del_flow(priv, flow->rule); + + kfree(flow); + + return 0; +} + +static const struct rhashtable_params mlx5e_tc_flow_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_flow, node), + .key_offset = offsetof(struct mlx5e_tc_flow, cookie), + .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +int mlx5e_tc_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + + tc->ht_params = mlx5e_tc_flow_ht_params; + return rhashtable_init(&tc->ht, &tc->ht_params); +} + +static void _mlx5e_tc_del_flow(void *ptr, void *arg) +{ + struct mlx5e_tc_flow *flow = ptr; + struct mlx5e_priv *priv = arg; + + mlx5e_tc_del_flow(priv, flow->rule); + kfree(flow); +} + +void mlx5e_tc_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv); + + if (!IS_ERR_OR_NULL(priv->fts.tc.t)) { + mlx5_destroy_flow_table(priv->fts.tc.t); + priv->fts.tc.t = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h new file mode 100644 index 000000000000..d677428dc10f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_TC_H__ +#define __MLX5_EN_TC_H__ + +#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff + +int mlx5e_tc_init(struct mlx5e_priv *priv); +void mlx5e_tc_cleanup(struct mlx5e_priv *priv); + +int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, + struct tc_cls_flower_offload *f); +int mlx5e_delete_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f); + +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) +{ + return atomic_read(&priv->fts.tc.ht.nelems); +} + +#endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2c3fba0fff54..1ffc7cb6f78c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -109,12 +109,10 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); - int up = skb_vlan_tag_present(skb) ? - skb->vlan_tci >> VLAN_PRIO_SHIFT : - priv->default_vlan_prio; - int tc = netdev_get_prio_tc_map(dev, up); + int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ? + skb->vlan_tci >> VLAN_PRIO_SHIFT : 0; - return priv->channeltc_to_txq_map[channel_ix][tc]; + return priv->channeltc_to_txq_map[channel_ix][up]; } static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, @@ -179,6 +177,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) unsigned int skb_len = skb->len; u8 opcode = MLX5_OPCODE_SEND; dma_addr_t dma_addr = 0; + unsigned int num_bytes; bool bf = false; u16 headlen; u16 ds_cnt; @@ -187,9 +186,16 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) memset(wqe, 0, sizeof(*wqe)); - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) - eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - else + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (skb->encapsulation) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + sq->stats.csum_offload_inner++; + } else { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + } + } else sq->stats.csum_offload_none++; if (sq->cc != sq->prev_cc) { @@ -198,24 +204,30 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } if (skb_is_gso(skb)) { - u32 payload_len; - eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); opcode = MLX5_OPCODE_LSO; - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload_len = skb->len - ihs; - wi->num_bytes = skb->len + - (skb_shinfo(skb)->gso_segs - 1) * ihs; - sq->stats.tso_packets++; - sq->stats.tso_bytes += payload_len; + + if (skb->encapsulation) { + ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + sq->stats.tso_inner_packets++; + sq->stats.tso_inner_bytes += skb->len - ihs; + } else { + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + sq->stats.tso_packets++; + sq->stats.tso_bytes += skb->len - ihs; + } + + num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; } else { bf = sq->bf_budget && !skb->xmit_more && !skb_shinfo(skb)->nr_frags; ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); - wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } + wi->num_bytes = num_bytes; + if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, &skb_len); @@ -293,7 +305,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; - if (bf && sq->uar_bf_map) + if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state)) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; @@ -307,6 +319,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->bf_budget = bf ? sq->bf_budget - 1 : 0; sq->stats.packets++; + sq->stats.bytes += num_bytes; return NETDEV_TX_OK; dma_unmap_wqe_err: @@ -326,7 +339,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } -bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq *sq; u32 dma_fifo_cc; @@ -335,10 +348,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) u16 sqcc; int i; - /* avoid accessing cq (dma coherent memory) if not needed */ - if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) - return false; - sq = container_of(cq, struct mlx5e_sq, cq); npkts = 0; @@ -402,7 +411,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) npkts++; nbytes += wi->num_bytes; sqcc += wi->num_wqebbs; - dev_kfree_skb(skb); + napi_consume_skb(skb, napi_budget); } while (!last_wqe); } @@ -422,10 +431,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) netif_tx_wake_queue(sq->txq); sq->stats.wake++; } - if (i == MLX5E_TX_CQ_POLL_BUDGET) { - set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); - return true; - } - return false; + return (i == MLX5E_TX_CQ_POLL_BUDGET); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 4ac8d716dbdd..9bb4395aceeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -60,7 +60,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); for (i = 0; i < c->num_tc; i++) - busy |= mlx5e_poll_tx_cq(&c->sq[i].cq); + busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; @@ -88,7 +88,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); - set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); barrier(); napi_schedule(cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 647a3ca2c2a9..18fccec72c5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -442,6 +442,11 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); +u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx) +{ + return dev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector; +} + int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a9894d2e8e26..f46f1db0fc00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); - in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); - list_for_each_entry(dst, &fte->node.children, node.list) { - unsigned int id; - - MLX5_SET(dest_format_struct, in_dests, destination_type, - dst->dest_attr.type); - if (dst->dest_attr.type == - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - id = dst->dest_attr.ft->id; - else - id = dst->dest_attr.tir_num; - MLX5_SET(dest_format_struct, in_dests, destination_id, id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + list_for_each_entry(dst, &fte->node.children, node.list) { + unsigned int id; + + MLX5_SET(dest_format_struct, in_dests, destination_type, + dst->dest_attr.type); + if (dst->dest_attr.type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + id = dst->dest_attr.ft->id; + } else { + id = dst->dest_attr.tir_num; + } + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + } } memset(out, 0, sizeof(out)); err = mlx5_cmd_exec_check_status(dev, in, inlen, out, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6f68dba8d7ed..5121be4675d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -73,10 +73,13 @@ #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ LEFTOVERS_MAX_FT) -#define KERNEL_MAX_FT 2 -#define KERNEL_NUM_PRIOS 1 +#define KERNEL_MAX_FT 3 +#define KERNEL_NUM_PRIOS 2 #define KENREL_MIN_LEVEL 2 +#define ANCHOR_MAX_FT 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) struct node_caps { size_t arr_sz; long *caps; @@ -92,7 +95,7 @@ static struct init_tree_node { int max_ft; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -108,6 +111,8 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), } }; @@ -196,8 +201,10 @@ static void tree_put_node(struct fs_node *node) static int tree_remove_node(struct fs_node *node) { - if (atomic_read(&node->refcount) > 1) - return -EPERM; + if (atomic_read(&node->refcount) > 1) { + atomic_dec(&node->refcount); + return -EEXIST; + } tree_put_node(node); return 0; } @@ -360,8 +367,13 @@ static void del_rule(struct fs_node *node) memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); list_del(&rule->node.list); - fte->dests_size--; - if (fte->dests_size) { + if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } + if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + --fte->dests_size) { err = mlx5_cmd_update_fte(dev, ft, fg->id, fte); if (err) @@ -465,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, ft->node.type = FS_TYPE_FLOW_TABLE; ft->type = table_type; ft->max_fte = max_fte; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); return ft; } @@ -601,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } +static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + lock_ref_node(&fte->node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + err = mlx5_cmd_update_fte(get_dev(&ft->node), + ft, fg->id, fte); + unlock_ref_node(&fte->node); + + return err; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + err = mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct mlx5_flow_table *next_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ @@ -612,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table err = connect_prev_fts(dev, ft, prio); if (err) return err; + + next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; } if (MLX5_CAP_FLOWTABLE(dev, @@ -762,8 +835,10 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) if (!rule) return NULL; + INIT_LIST_HEAD(&rule->next_ft); rule->node.type = FS_TYPE_FLOW_DEST; - memcpy(&rule->dest_attr, dest, sizeof(*dest)); + if (dest) + memcpy(&rule->dest_attr, dest, sizeof(*dest)); return rule; } @@ -782,11 +857,17 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, return ERR_PTR(-ENOMEM); fs_get_obj(ft, fg->node.parent); - /* Add dest to dests list- added as first element after the head */ + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ tree_init_node(&rule->node, 1, del_rule); - list_add_tail(&rule->node.list, &fte->node.children); - fte->dests_size++; - if (fte->dests_size == 1) + if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) + fte->dests_size++; + if (fte->dests_size == 1 || !dest) err = mlx5_cmd_create_fte(get_dev(&ft->node), ft, fg->id, fte); else @@ -802,7 +883,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, free_rule: list_del(&rule->node.list); kfree(rule); - fte->dests_size--; + if (dest) + fte->dests_size--; return ERR_PTR(err); } @@ -903,6 +985,25 @@ out: return fg; } +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (rule->dest_attr.type == dest->type) { + if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dest->vport_num == rule->dest_attr.vport_num) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + dest->ft == rule->dest_attr.ft) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + dest->tir_num == rule->dest_attr.tir_num)) + return rule; + } + } + return NULL; +} + static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, u8 action, @@ -919,6 +1020,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && action == fte->action && flow_tag == fte->flow_tag) { + rule = find_flow_rule(fte, dest); + if (rule) { + atomic_inc(&rule->node.refcount); + unlock_ref_node(&fte->node); + unlock_ref_node(&fg->node); + return rule; + } rule = add_rule_fte(fte, fg, dest); unlock_ref_node(&fte->node); if (IS_ERR(rule)) @@ -984,18 +1092,21 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, return rule; } -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_rule * +_mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) { struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; + if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest) + return ERR_PTR(-EINVAL); + nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, @@ -1014,6 +1125,63 @@ unlock: unlock_ref_node(&ft->node); return rule; } + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_destination gen_dest; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_rule *rule = NULL; + u32 sw_action = action; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + if (dest) + return ERR_PTR(-EINVAL); + mutex_lock(&root->chain_lock); + next_ft = find_next_chained_ft(prio); + if (next_ft) { + gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest.ft = next_ft; + dest = &gen_dest; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + mutex_unlock(&root->chain_lock); + return ERR_PTR(-EOPNOTSUPP); + } + } + + rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); + + if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!IS_ERR_OR_NULL(rule) && + (list_empty(&rule->next_ft))) { + mutex_lock(&next_ft->lock); + list_add(&rule->next_ft, &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + mutex_unlock(&root->chain_lock); + } + return rule; +} EXPORT_SYMBOL(mlx5_add_flow_rule); void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) @@ -1077,6 +1245,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) return 0; next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + err = connect_prev_fts(dev, next_ft, prio); if (err) mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", @@ -1126,6 +1298,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, case MLX5_FLOW_NAMESPACE_BYPASS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: @@ -1351,6 +1524,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) } } +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +static int create_anchor_flow_table(struct mlx5_core_dev + *dev) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (!ns) + return -EINVAL; + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + static int init_root_ns(struct mlx5_core_dev *dev) { @@ -1363,6 +1555,9 @@ static int init_root_ns(struct mlx5_core_dev *dev) set_prio_attrs(dev->priv.root_ns); + if (create_anchor_flow_table(dev)) + goto cleanup; + return 0; cleanup: @@ -1392,6 +1587,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, root_ns = NULL; } +static void destroy_flow_tables(struct fs_prio *prio) +{ + struct mlx5_flow_table *iter; + struct mlx5_flow_table *tmp; + + fs_for_each_ft_safe(iter, tmp, prio) + mlx5_destroy_flow_table(iter); +} + static void cleanup_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; @@ -1420,6 +1624,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) list); fs_get_obj(obj_iter_prio2, iter_prio2); + destroy_flow_tables(obj_iter_prio2); if (tree_remove_node(iter_prio2)) { mlx5_core_warn(dev, "Priority %d wasn't destroyed, refcount > 1\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 00245fd7e4bc..f37a6248a27b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -68,6 +68,11 @@ struct fs_node { struct mlx5_flow_rule { struct fs_node node; struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; }; /* Type of children is mlx5_flow_group */ @@ -82,6 +87,10 @@ struct mlx5_flow_table { unsigned int required_groups; unsigned int num_groups; } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; }; /* Type of children is mlx5_flow_rule */ @@ -142,6 +151,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_list_for_each_entry(pos, root) \ list_for_each_entry(pos, root, node.list) +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + #define fs_for_each_ns_or_ft_reverse(pos, prio) \ list_for_each_entry_reverse(pos, &(prio)->node.children, list) @@ -157,6 +169,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_ft(pos, prio) \ fs_list_for_each_entry(pos, &(prio)->node.children) +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + #define fs_for_each_fg(pos, ft) \ fs_list_for_each_entry(pos, &(ft)->node.children) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1545a944c309..72a94e72ee25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -767,22 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } -static int map_bf_area(struct mlx5_core_dev *dev) -{ - resource_size_t bf_start = pci_resource_start(dev->pdev, 0); - resource_size_t bf_len = pci_resource_len(dev->pdev, 0); - - dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len); - - return dev->priv.bf_mapping ? 0 : -ENOMEM; -} - -static void unmap_bf_area(struct mlx5_core_dev *dev) -{ - if (dev->priv.bf_mapping) - io_mapping_free(dev->priv.bf_mapping); -} - static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -1103,21 +1087,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_stop_eqs; } - if (map_bf_area(dev)) - dev_err(&pdev->dev, "Failed to map blue flame area\n"); - err = mlx5_irq_set_affinity_hints(dev); - if (err) { + if (err) dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_unmap_bf_area; - } MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); mlx5_init_cq_table(dev); mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); - mlx5_init_mr_table(dev); + mlx5_init_mkey_table(dev); err = mlx5_init_fs(dev); if (err) { @@ -1164,15 +1143,11 @@ err_sriov: err_reg_dev: mlx5_cleanup_fs(dev); err_fs: - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); - -err_unmap_bf_area: - unmap_bf_area(dev); - free_comp_eqs(dev); err_stop_eqs: @@ -1237,12 +1212,11 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #endif mlx5_cleanup_fs(dev); - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); - unmap_bf_area(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0336847ec9a1..0b0b226c789e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -99,6 +99,7 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); +u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 6fa22b51e460..77a7293921d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,25 +36,26 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -void mlx5_init_mr_table(struct mlx5_core_dev *dev) +void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; memset(table, 0, sizeof(*table)); rwlock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { } -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_create_mkey_mbox_out lout; int err; u8 key; @@ -83,34 +84,35 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } - mr->iova = be64_to_cpu(in->seg.start_addr); - mr->size = be64_to_cpu(in->seg.len); - mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; - mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mkey->iova = be64_to_cpu(in->seg.start_addr); + mkey->size = be64_to_cpu(in->seg.len); + mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - be32_to_cpu(lout.mkey), key, mr->key); + be32_to_cpu(lout.mkey), key, mkey->key); - /* connect to MR tree */ + /* connect to mkey tree */ write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); write_unlock_irq(&table->lock); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", - mlx5_base_mkey(mr->key), err); - mlx5_core_destroy_mkey(dev, mr); + mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_base_mkey(mkey->key), err); + mlx5_core_destroy_mkey(dev, mkey); } return err; } EXPORT_SYMBOL(mlx5_core_create_mkey); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; - struct mlx5_core_mr *deleted_mr; + struct mlx5_core_mkey *deleted_mkey; unsigned long flags; int err; @@ -118,16 +120,16 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) memset(&out, 0, sizeof(out)); write_lock_irqsave(&table->lock, flags); - deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); write_unlock_irqrestore(&table->lock, flags); - if (!deleted_mr) { - mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", - mlx5_base_mkey(mr->key)); + if (!deleted_mkey) { + mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_base_mkey(mkey->key)); return -ENOENT; } in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; @@ -139,7 +141,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) } EXPORT_SYMBOL(mlx5_core_destroy_mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_query_mkey_mbox_out *out, int outlen) { struct mlx5_query_mkey_mbox_in in; @@ -149,7 +151,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, memset(out, 0, outlen); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); if (err) return err; @@ -161,7 +163,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, } EXPORT_SYMBOL(mlx5_core_query_mkey); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { struct mlx5_query_special_ctxs_mbox_in in; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a87e773e93f3..ae378c575deb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/port.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -324,6 +325,29 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz) +{ + u32 *in; + int err; + + in = mlx5_vzalloc(sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(ppcnt_reg, in, local_port, port_num); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + err = mlx5_core_access_reg(dev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); + int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; @@ -363,3 +387,223 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev, return 0; } EXPORT_SYMBOL_GPL(mlx5_query_port_pause); + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); + MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); + +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 0); + if (err) + return err; + + if (pfc_en_tx) + *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx); + + if (pfc_en_rx) + *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); + +int mlx5_max_tc(struct mlx5_core_dev *mdev) +{ + u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; + + return num_tc - 1; +} + +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + int i; + + memset(in, 0, sizeof(in)); + for (i = 0; i < 8; i++) { + if (prio_tc[i] > mlx5_max_tc(mdev)) + return -EINVAL; + + MLX5_SET(qtct_reg, in, prio, i); + MLX5_SET(qtct_reg, in, tclass, prio_tc[i]); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 1); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); + +static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -ENOTSUPP; + + return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), + MLX5_REG_QETCR, 0, 1); +} + +static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -ENOTSUPP; + + memset(in, 0, sizeof(in)); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, + MLX5_REG_QETCR, 0, 0); +} + +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + int i; + + memset(in, 0, sizeof(in)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); + +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + int i; + + memset(in, 0, sizeof(in)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); + +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int i; + + memset(in, 0, sizeof(in)); + + MLX5_SET(qetc_reg, in, port_number, 1); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]); + + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units, + max_bw_units[i]); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value, + max_bw_value[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); + +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + int i; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]); + + max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_value); + max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_units); + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); + +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]; + u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); + MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); + MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), + out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_wol); + +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]; + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), + out, sizeof(out)); + + if (!err) + *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_wol); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index eb05c845ece9..8ba080e441a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -226,7 +226,8 @@ int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) return 0; } -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, + bool map_wc) { phys_addr_t pfn; phys_addr_t uar_bar_start; @@ -240,20 +241,26 @@ int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) uar_bar_start = pci_resource_start(mdev->pdev, 0); pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) { - mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); - err = -ENOMEM; - goto err_free_uar; - } - if (mdev->priv.bf_mapping) - uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping, - uar->index << PAGE_SHIFT); + if (map_wc) { + uar->bf_map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->bf_map) { + mlx5_core_warn(mdev, "ioremap_wc() failed\n"); + uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) + goto err_free_uar; + } + } else { + uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) + goto err_free_uar; + } return 0; err_free_uar: + mlx5_core_warn(mdev, "ioremap() failed\n"); + err = -ENOMEM; mlx5_cmd_free_uar(mdev, uar->index); return err; @@ -262,8 +269,8 @@ EXPORT_SYMBOL(mlx5_alloc_map_uar); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) { - io_mapping_unmap(uar->bf_map); iounmap(uar->map); + iounmap(uar->bf_map); mlx5_cmd_free_uar(mdev, uar->index); } EXPORT_SYMBOL(mlx5_unmap_free_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c7398b95aecd..90ab09e375b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -850,3 +850,43 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, void *out, size_t out_sz) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = mlx5_vzalloc(in_sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, 0); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto free; + err = mlx5_cmd_status_to_err_v2(out); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c new file mode 100644 index 000000000000..9f10df25f3cd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "vxlan.h" + +void mlx5e_vxlan_init(struct mlx5e_priv *priv) +{ + struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + + spin_lock_init(&vxlan_db->lock); + INIT_RADIX_TREE(&vxlan_db->tree, GFP_ATOMIC); +} + +static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + struct mlx5_outbox_hdr *hdr; + int err; + + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]; + u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(add_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); + MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + hdr = (struct mlx5_outbox_hdr *)out; + return hdr->status ? -ENOMEM : 0; +} + +static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; + u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); +} + +struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) +{ + struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + struct mlx5e_vxlan *vxlan; + + spin_lock(&vxlan_db->lock); + vxlan = radix_tree_lookup(&vxlan_db->tree, port); + spin_unlock(&vxlan_db->lock); + + return vxlan; +} + +int mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port) +{ + struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + struct mlx5e_vxlan *vxlan; + int err; + + err = mlx5e_vxlan_core_add_port_cmd(priv->mdev, port); + if (err) + return err; + + vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL); + if (!vxlan) { + err = -ENOMEM; + goto err_delete_port; + } + + vxlan->udp_port = port; + + spin_lock_irq(&vxlan_db->lock); + err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); + spin_unlock_irq(&vxlan_db->lock); + if (err) + goto err_free; + + return 0; + +err_free: + kfree(vxlan); +err_delete_port: + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + return err; +} + +static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +{ + struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + struct mlx5e_vxlan *vxlan; + + spin_lock_irq(&vxlan_db->lock); + vxlan = radix_tree_delete(&vxlan_db->tree, port); + spin_unlock_irq(&vxlan_db->lock); + + if (!vxlan) + return; + + mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); + + kfree(vxlan); +} + +void mlx5e_vxlan_del_port(struct mlx5e_priv *priv, u16 port) +{ + if (!mlx5e_vxlan_lookup_port(priv, port)) + return; + + __mlx5e_vxlan_core_del_port(priv, port); +} + +void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + struct mlx5e_vxlan *vxlan; + unsigned int port = 0; + + spin_lock_irq(&vxlan_db->lock); + while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { + port = vxlan->udp_port; + spin_unlock_irq(&vxlan_db->lock); + __mlx5e_vxlan_core_del_port(priv, (u16)port); + spin_lock_irq(&vxlan_db->lock); + } + spin_unlock_irq(&vxlan_db->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h new file mode 100644 index 000000000000..a01685056ab1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_VXLAN_H__ +#define __MLX5_VXLAN_H__ + +#include <linux/mlx5/driver.h> +#include "en.h" + +struct mlx5e_vxlan { + u16 udp_port; +}; + +static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && + mlx5_core_is_pf(mdev)); +} + +void mlx5e_vxlan_init(struct mlx5e_priv *priv); +int mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port); +void mlx5e_vxlan_del_port(struct mlx5e_priv *priv, u16 port); +struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port); +void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); + +#endif /* __MLX5_VXLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index ce26adcb4988..2ad7f67854d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -4,6 +4,7 @@ config MLXSW_CORE tristate "Mellanox Technologies Switch ASICs support" + depends on MAY_USE_DEVLINK ---help--- This driver supports Mellanox Technologies Switch ASICs family. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 22379eb8e924..f69f6280519f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -56,6 +56,7 @@ #include <linux/rcupdate.h> #include <linux/slab.h> #include <asm/byteorder.h> +#include <net/devlink.h> #include "core.h" #include "item.h" @@ -784,6 +785,38 @@ static void mlxsw_core_debugfs_fini(struct mlxsw_core *mlxsw_core) debugfs_remove_recursive(mlxsw_core->dbg_dir); } +static int mlxsw_devlink_port_split(struct devlink *devlink, + unsigned int port_index, + unsigned int count) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + if (port_index >= MLXSW_PORT_MAX_PORTS) + return -EINVAL; + if (!mlxsw_core->driver->port_split) + return -EOPNOTSUPP; + return mlxsw_core->driver->port_split(mlxsw_core->driver_priv, + port_index, count); +} + +static int mlxsw_devlink_port_unsplit(struct devlink *devlink, + unsigned int port_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + if (port_index >= MLXSW_PORT_MAX_PORTS) + return -EINVAL; + if (!mlxsw_core->driver->port_unsplit) + return -EOPNOTSUPP; + return mlxsw_core->driver->port_unsplit(mlxsw_core->driver_priv, + port_index); +} + +static const struct devlink_ops mlxsw_devlink_ops = { + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, +}; + int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv) @@ -791,6 +824,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; struct mlxsw_driver *mlxsw_driver; + struct devlink *devlink; size_t alloc_size; int err; @@ -798,12 +832,13 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!mlxsw_driver) return -EINVAL; alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; - mlxsw_core = kzalloc(alloc_size, GFP_KERNEL); - if (!mlxsw_core) { + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size); + if (!devlink) { err = -ENOMEM; - goto err_core_alloc; + goto err_devlink_alloc; } + mlxsw_core = devlink_priv(devlink); INIT_LIST_HEAD(&mlxsw_core->rx_listener_list); INIT_LIST_HEAD(&mlxsw_core->event_listener_list); mlxsw_core->driver = mlxsw_driver; @@ -841,6 +876,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_hwmon_init; + err = devlink_register(devlink, mlxsw_bus_info->dev); + if (err) + goto err_devlink_register; + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, mlxsw_bus_info); if (err) @@ -855,6 +894,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core->driver_priv); err_driver_init: + devlink_unregister(devlink); +err_devlink_register: err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: @@ -864,8 +905,8 @@ err_bus_init: err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: - kfree(mlxsw_core); -err_core_alloc: + devlink_free(devlink); +err_devlink_alloc: mlxsw_core_driver_put(device_kind); return err; } @@ -874,14 +915,16 @@ EXPORT_SYMBOL(mlxsw_core_bus_device_register); void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) { const char *device_kind = mlxsw_core->bus_info->device_kind; + struct devlink *devlink = priv_to_devlink(mlxsw_core); mlxsw_core_debugfs_fini(mlxsw_core); mlxsw_core->driver->fini(mlxsw_core->driver_priv); + devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); - kfree(mlxsw_core); + devlink_free(devlink); mlxsw_core_driver_put(device_kind); } EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index a01723600f0a..c73d1c0792a6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -186,6 +186,8 @@ struct mlxsw_driver { int (*init)(void *driver_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info); void (*fini)(void *driver_priv); + int (*port_split)(void *driver_priv, u8 local_port, unsigned int count); + int (*port_unsplit)(void *driver_priv, u8 local_port); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); u8 txhdr_len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c071077aafbd..7f4173c8eda3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -215,7 +215,7 @@ mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q) { int index = q->producer_counter & (q->count - 1); - if ((q->producer_counter - q->consumer_counter) == q->count) + if ((u16) (q->producer_counter - q->consumer_counter) == q->count) return NULL; return mlxsw_pci_queue_elem_info_get(q, index); } @@ -1681,11 +1681,18 @@ static const struct mlxsw_bus mlxsw_pci_bus = { static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci) { + unsigned long end; + mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT); - /* Current firware does not let us know when the reset is done. - * So we just wait here for constant time and hope for the best. - */ - msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + wmb(); /* reset needs to be written before we read control register */ + end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + do { + u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); + + if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) + break; + cond_resched(); + } while (time_before(jiffies, end)); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 912106054ff2..d942a3e6fa41 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -61,6 +61,9 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_FW_READY 0xA1844 +#define MLXSW_PCI_FW_READY_MASK 0xFF +#define MLXSW_PCI_FW_READY_MAGIC 0x5E #define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 #define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200 diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index 726f5435b32f..f33b997f2b61 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -49,7 +49,7 @@ #define MLXSW_PORT_MID 0xd000 #define MLXSW_PORT_MAX_PHY_PORTS 0x40 -#define MLXSW_PORT_MAX_PORTS MLXSW_PORT_MAX_PHY_PORTS +#define MLXSW_PORT_MAX_PORTS (MLXSW_PORT_MAX_PHY_PORTS + 1) #define MLXSW_PORT_DEVID_BITS_OFFSET 10 #define MLXSW_PORT_PHY_BITS_OFFSET 4 @@ -59,6 +59,8 @@ #define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) +#define MLXSW_PORT_MODULE_MAX_WIDTH 4 + enum mlxsw_port_admin_status { MLXSW_PORT_ADMIN_STATUS_UP = 1, MLXSW_PORT_ADMIN_STATUS_DOWN = 2, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 0c5237264e3e..ffe4c0305733 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -873,6 +873,62 @@ static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port, } } +/* SPAFT - Switch Port Acceptable Frame Types + * ------------------------------------------ + * The Switch Port Acceptable Frame Types register configures the frame + * admittance of the port. + */ +#define MLXSW_REG_SPAFT_ID 0x2010 +#define MLXSW_REG_SPAFT_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_spaft = { + .id = MLXSW_REG_SPAFT_ID, + .len = MLXSW_REG_SPAFT_LEN, +}; + +/* reg_spaft_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported (all tag types are allowed). + */ +MLXSW_ITEM32(reg, spaft, local_port, 0x00, 16, 8); + +/* reg_spaft_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8); + +/* reg_spaft_allow_untagged + * When set, untagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1); + +/* reg_spaft_allow_prio_tagged + * When set, priority tagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1); + +/* reg_spaft_allow_tagged + * When set, tagged frames on the ingress are allowed (default). + * Access: RW + */ +MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1); + +static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port, + bool allow_untagged) +{ + MLXSW_REG_ZERO(spaft, payload); + mlxsw_reg_spaft_local_port_set(payload, local_port); + mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged); + mlxsw_reg_spaft_allow_prio_tagged_set(payload, true); + mlxsw_reg_spaft_allow_tagged_set(payload, true); +} + /* SFGC - Switch Flooding Group Configuration * ------------------------------------------ * The following register controls the association of flooding tables and MIDs @@ -1044,6 +1100,92 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SFDF - Switch Filtering DB Flush + * -------------------------------- + * The switch filtering DB flush register is used to flush the FDB. + * Note that FDB notifications are flushed as well. + */ +#define MLXSW_REG_SFDF_ID 0x2013 +#define MLXSW_REG_SFDF_LEN 0x14 + +static const struct mlxsw_reg_info mlxsw_reg_sfdf = { + .id = MLXSW_REG_SFDF_ID, + .len = MLXSW_REG_SFDF_LEN, +}; + +/* reg_sfdf_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8); + +enum mlxsw_reg_sfdf_flush_type { + MLXSW_REG_SFDF_FLUSH_PER_SWID, + MLXSW_REG_SFDF_FLUSH_PER_FID, + MLXSW_REG_SFDF_FLUSH_PER_PORT, + MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID, + MLXSW_REG_SFDF_FLUSH_PER_LAG, + MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID, +}; + +/* reg_sfdf_flush_type + * Flush type. + * 0 - All SWID dynamic entries are flushed. + * 1 - All FID dynamic entries are flushed. + * 2 - All dynamic entries pointing to port are flushed. + * 3 - All FID dynamic entries pointing to port are flushed. + * 4 - All dynamic entries pointing to LAG are flushed. + * 5 - All FID dynamic entries pointing to LAG are flushed. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4); + +/* reg_sfdf_flush_static + * Static. + * 0 - Flush only dynamic entries. + * 1 - Flush both dynamic and static entries. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1); + +static inline void mlxsw_reg_sfdf_pack(char *payload, + enum mlxsw_reg_sfdf_flush_type type) +{ + MLXSW_REG_ZERO(sfdf, payload); + mlxsw_reg_sfdf_flush_type_set(payload, type); + mlxsw_reg_sfdf_flush_static_set(payload, true); +} + +/* reg_sfdf_fid + * FID to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16); + +/* reg_sfdf_system_port + * Port to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16); + +/* reg_sfdf_port_fid_system_port + * Port to flush, pointed to by FID. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16); + +/* reg_sfdf_lag_id + * LAG ID to flush. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10); + +/* reg_sfdf_lag_fid_lag_id + * LAG ID to flush, pointed to by FID. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10); + /* SLDR - Switch LAG Descriptor Register * ----------------------------------------- * The switch LAG descriptor register is populated by LAG descriptors. @@ -1701,20 +1843,20 @@ MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8); * Module number. * Access: RW */ -MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false); +MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false); /* reg_pmlp_tx_lane * Tx Lane. When rxtx field is cleared, this field is used for Rx as well. * Access: RW */ -MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false); +MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false); /* reg_pmlp_rx_lane * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is * equal to Tx lane. * Access: RW */ -MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false); +MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false); static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port) { @@ -3117,10 +3259,14 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SPVID"; case MLXSW_REG_SPVM_ID: return "SPVM"; + case MLXSW_REG_SPAFT_ID: + return "SPAFT"; case MLXSW_REG_SFGC_ID: return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SFDF_ID: + return "SFDF"; case MLXSW_REG_SLDR_ID: return "SLDR"; case MLXSW_REG_SLCR_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ce6845d534a8..4afbc3e9e381 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -49,6 +49,7 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/list.h> +#include <net/devlink.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -304,21 +305,47 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int mlxsw_sp_port_module_check(struct mlxsw_sp_port *mlxsw_sp_port, - bool *p_usable) +static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; - mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); if (err) return err; - *p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false; + *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); return 0; } +static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 module, u8 width, u8 lane) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int i; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_width_set(pmlp_pl, width); + for (i = 0; i < width; i++) { + mlxsw_reg_pmlp_module_set(pmlp_pl, i, module); + mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, lane + i); /* Rx & Tx */ + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); +} + +static int mlxsw_sp_port_module_unmap(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_width_set(pmlp_pl, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); +} + static int mlxsw_sp_port_open(struct net_device *dev) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -1273,6 +1300,18 @@ static u32 mlxsw_sp_to_ptys_speed(u32 speed) return ptys_proto; } +static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp_port_link_mode[i].speed <= upper_speed) + ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + } + return ptys_proto; +} + static int mlxsw_sp_port_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -1349,11 +1388,27 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .set_settings = mlxsw_sp_port_set_settings, }; -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) +static int +mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 upper_speed = MLXSW_SP_PORT_BASE_SPEED * width; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_admin; + + eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed); + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, + eth_proto_admin); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); +} + +static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; + struct devlink_port *devlink_port; struct net_device *dev; - bool usable; size_t bytes; int err; @@ -1364,6 +1419,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; + mlxsw_sp_port->split = split; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -1404,17 +1460,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) */ dev->hard_header_len += MLXSW_TXHDR_LEN; - err = mlxsw_sp_port_module_check(mlxsw_sp_port, &usable); + devlink_port = &mlxsw_sp_port->devlink_port; + if (mlxsw_sp_port->split) + devlink_port_split_set(devlink_port, module); + err = devlink_port_register(devlink, devlink_port, local_port); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to check module\n", + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register devlink port\n", mlxsw_sp_port->local_port); - goto err_port_module_check; - } - - if (!usable) { - dev_dbg(mlxsw_sp->bus_info->dev, "Port %d: Not usable, skipping initialization\n", - mlxsw_sp_port->local_port); - goto port_not_usable; + goto err_devlink_port_register; } err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); @@ -1431,6 +1484,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) goto err_port_swid_set; } + err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n", + mlxsw_sp_port->local_port); + goto err_port_speed_by_width_set; + } + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", @@ -1457,6 +1517,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) goto err_register_netdev; } + devlink_port_type_eth_set(devlink_port, dev); + err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); if (err) goto err_port_vlan_init; @@ -1470,10 +1532,11 @@ err_register_netdev: err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: +err_port_speed_by_width_set: err_port_swid_set: err_port_system_port_mapping_set: -port_not_usable: -err_port_module_check: + devlink_port_unregister(&mlxsw_sp_port->devlink_port); +err_devlink_port_register: err_dev_addr_init: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: @@ -1485,6 +1548,28 @@ err_port_active_vlans_alloc: return err; } +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width, u8 lane) +{ + int err; + + err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, + lane); + if (err) + return err; + + err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module, + width); + if (err) + goto err_port_create; + + return 0; + +err_port_create: + mlxsw_sp_port_module_unmap(mlxsw_sp, local_port); + return err; +} + static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; @@ -1505,12 +1590,19 @@ static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + struct devlink_port *devlink_port; if (!mlxsw_sp_port) return; + mlxsw_sp->ports[local_port] = NULL; + devlink_port = &mlxsw_sp_port->devlink_port; + devlink_port_type_clear(devlink_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + devlink_port_unregister(devlink_port); mlxsw_sp_port_vports_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); + mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); @@ -1529,6 +1621,7 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { size_t alloc_size; + u8 module, width; int i; int err; @@ -1538,19 +1631,158 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { - err = mlxsw_sp_port_create(mlxsw_sp, i); + err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, + &width); + if (err) + goto err_port_module_info_get; + if (!width) + continue; + mlxsw_sp->port_to_module[i] = module; + err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); if (err) goto err_port_create; } return 0; err_port_create: +err_port_module_info_get: for (i--; i >= 1; i--) mlxsw_sp_port_remove(mlxsw_sp, i); kfree(mlxsw_sp->ports); return err; } +static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) +{ + u8 offset = (local_port - 1) % MLXSW_SP_PORTS_PER_CLUSTER_MAX; + + return local_port - offset; +} + +static int mlxsw_sp_port_split(void *priv, u8 local_port, unsigned int count) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port; + u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + u8 module, cur_width, base_port; + int i; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) { + dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", + local_port); + return -EINVAL; + } + + if (count != 2 && count != 4) { + netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, + &cur_width); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); + return err; + } + + if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { + netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); + return -EINVAL; + } + + /* Make sure we have enough slave (even) ports for the split. */ + if (count == 2) { + base_port = local_port; + if (mlxsw_sp->ports[base_port + 1]) { + netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n"); + return -EINVAL; + } + } else { + base_port = mlxsw_sp_cluster_base_port_get(local_port); + if (mlxsw_sp->ports[base_port + 1] || + mlxsw_sp->ports[base_port + 3]) { + netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n"); + return -EINVAL; + } + } + + for (i = 0; i < count; i++) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, + module, width, i * width); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n"); + goto err_port_create; + } + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + for (i = 0; i < count / 2; i++) { + module = mlxsw_sp->port_to_module[base_port + i * 2]; + mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, + module, MLXSW_PORT_MODULE_MAX_WIDTH, 0); + } + return err; +} + +static int mlxsw_sp_port_unsplit(void *priv, u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port; + u8 module, cur_width, base_port; + unsigned int count; + int i; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) { + dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", + local_port); + return -EINVAL; + } + + if (!mlxsw_sp_port->split) { + netdev_err(mlxsw_sp_port->dev, "Port wasn't split\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, + &cur_width); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); + return err; + } + count = cur_width == 1 ? 4 : 2; + + base_port = mlxsw_sp_cluster_base_port_get(local_port); + + /* Determine which ports to remove. */ + if (count == 2 && local_port >= base_port + 2) + base_port = base_port + 2; + + for (i = 0; i < count; i++) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + + for (i = 0; i < count / 2; i++) { + module = mlxsw_sp->port_to_module[base_port + i * 2]; + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, + module, MLXSW_PORT_MODULE_MAX_WIDTH, + 0); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n"); + } + + return 0; +} + static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, char *pude_pl, void *priv) { @@ -1974,11 +2206,122 @@ static struct mlxsw_driver mlxsw_sp_driver = { .priv_size = sizeof(struct mlxsw_sp), .init = mlxsw_sp_init, .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, .txhdr_construct = mlxsw_sp_txhdr_construct, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp_config_profile, }; +static int +mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT); + mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static int +mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); + mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, + mlxsw_sp_port->local_port); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static int +mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG); + mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static int +mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); + mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +static int +__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, last_err = 0; + u16 vid; + + for (vid = 1; vid < VLAN_N_VID - 1; vid++) { + err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid); + if (err) + last_err = err; + } + + return last_err; +} + +static int +__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, last_err = 0; + u16 vid; + + for (vid = 1; vid < VLAN_N_VID - 1; vid++) { + err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid); + if (err) + last_err = err; + } + + return last_err; +} + +static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port) +{ + if (!list_empty(&mlxsw_sp_port->vports_list)) + if (mlxsw_sp_port->lagged) + return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port); + else + return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port); + else + if (mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port); + else + return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port); +} + +static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport); + u16 fid = mlxsw_sp_vfid_to_fid(vfid); + + if (mlxsw_sp_vport->lagged) + return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport, + fid); + else + return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid); +} + static bool mlxsw_sp_port_dev_check(const struct net_device *dev) { return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; @@ -2006,10 +2349,16 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + bool flush_fdb) { struct net_device *dev = mlxsw_sp_port->dev; + if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port)) + netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); + + mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + mlxsw_sp_port->learning = 0; mlxsw_sp_port->learning_sync = 0; mlxsw_sp_port->uc_flood = 0; @@ -2200,10 +2549,15 @@ err_col_port_enable: return err; } +static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev, + bool flush_fdb); + static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_upper *lag; u16 lag_id = mlxsw_sp_port->lag_id; int err; @@ -2220,7 +2574,30 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; + /* In case we leave a LAG device that has bridges built on top, + * then their teardown sequence is never issued and we need to + * invoke the necessary cleanup routines ourselves. + */ + list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, + vport.list) { + struct net_device *br_dev; + + if (!mlxsw_sp_vport->bridged) + continue; + + br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false); + } + + if (mlxsw_sp_port->bridged) { + mlxsw_sp_port_active_vlans_del(mlxsw_sp_port); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false); + mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL); + } + if (lag->ref_count == 1) { + if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port)) + netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); if (err) return err; @@ -2272,9 +2649,6 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); } -static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev); - static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *vlan_dev) { @@ -2312,7 +2686,7 @@ static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *br_dev; br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev); + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true); } mlxsw_sp_vport->dev = mlxsw_sp_port->dev; @@ -2374,7 +2748,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, } mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev); } else { - err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port); + err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port, + true); mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); if (err) { netdev_err(dev, "Failed to leave bridge\n"); @@ -2541,7 +2916,8 @@ static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev) + struct net_device *br_dev, + bool flush_fdb) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); @@ -2604,6 +2980,16 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, goto err_vport_flood_set; } + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, + MLXSW_REG_SPMS_STATE_FORWARDING); + if (err) { + netdev_err(dev, "Failed to set STP state\n"); + goto err_port_stp_state_set; + } + + if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport)) + netdev_err(dev, "Failed to flush FDB\n"); + /* Switch between the vFIDs and destroy the old one if needed. */ new_vfid->nr_vports++; mlxsw_sp_vport->vport.vfid = new_vfid; @@ -2618,6 +3004,7 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, return 0; +err_port_stp_state_set: err_vport_flood_set: err_port_vid_learning_set: err_port_vid_to_fid_validate: @@ -2777,7 +3164,7 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, if (!mlxsw_sp_vport) return NOTIFY_DONE; err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, - upper_dev); + upper_dev, true); if (err) { netdev_err(dev, "Failed to leave bridge\n"); return NOTIFY_BAD; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a23dc610d259..4b8abaf06321 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -43,6 +43,7 @@ #include <linux/if_vlan.h> #include <linux/list.h> #include <net/switchdev.h> +#include <net/devlink.h> #include "port.h" #include "core.h" @@ -57,6 +58,10 @@ #define MLXSW_SP_MID_MAX 7000 +#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 + +#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ + struct mlxsw_sp_port; struct mlxsw_sp_upper { @@ -118,11 +123,13 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 unsigned int interval; /* ms */ } fdb_notify; +#define MLXSW_SP_MIN_AGEING_TIME 10 +#define MLXSW_SP_MAX_AGEING_TIME 1000000 #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct mutex fdb_lock; /* Make sure FDB sessions are atomic. */ struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; + u8 port_to_module[MLXSW_PORT_MAX_PORTS]; }; static inline struct mlxsw_sp_upper * @@ -150,7 +157,8 @@ struct mlxsw_sp_port { learning_sync:1, uc_flood:1, bridged:1, - lagged:1; + lagged:1, + split:1; u16 pvid; u16 lag_id; struct { @@ -163,6 +171,7 @@ struct mlxsw_sp_port { unsigned long *untagged_vlans; /* VLAN interfaces */ struct list_head vports_list; + struct devlink_port devlink_port; }; static inline struct mlxsw_sp_port * @@ -254,5 +263,7 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, bool set, bool only_uc); +void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 45479ef5bcf4..e1c74efff51a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -45,6 +45,7 @@ #include <linux/if_bridge.h> #include <linux/workqueue.h> #include <linux/jiffies.h> +#include <linux/rtnetlink.h> #include <net/switchdev.h> #include "spectrum.h" @@ -124,14 +125,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, int err; switch (state) { - case BR_STATE_DISABLED: /* fall-through */ case BR_STATE_FORWARDING: spms_state = MLXSW_REG_SPMS_STATE_FORWARDING; break; - case BR_STATE_LISTENING: /* fall-through */ case BR_STATE_LEARNING: spms_state = MLXSW_REG_SPMS_STATE_LEARNING; break; + case BR_STATE_LISTENING: /* fall-through */ + case BR_STATE_DISABLED: /* fall-through */ case BR_STATE_BLOCKING: spms_state = MLXSW_REG_SPMS_STATE_DISCARDING; break; @@ -310,8 +311,13 @@ static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; - if (switchdev_trans_ph_prepare(trans)) - return 0; + if (switchdev_trans_ph_prepare(trans)) { + if (ageing_time < MLXSW_SP_MIN_AGEING_TIME || + ageing_time > MLXSW_SP_MAX_AGEING_TIME) + return -ERANGE; + else + return 0; + } return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); } @@ -369,7 +375,8 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char spvid_pl[MLXSW_REG_SPVID_LEN]; @@ -378,6 +385,53 @@ static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); } +static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool allow) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spaft_pl[MLXSW_REG_SPAFT_LEN]; + + mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl); +} + +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + if (!vid) { + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false); + if (err) { + netdev_err(dev, "Failed to disallow untagged traffic\n"); + return err; + } + } else { + err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + if (err) { + netdev_err(dev, "Failed to set PVID\n"); + return err; + } + + /* Only allow if not already allowed. */ + if (!mlxsw_sp_port->pvid) { + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, + true); + if (err) { + netdev_err(dev, "Failed to allow untagged traffic\n"); + goto err_port_allow_untagged_set; + } + } + } + + mlxsw_sp_port->pvid = vid; + return 0; + +err_port_allow_untagged_set: + __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid); + return err; +} + static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) { char sfmr_pl[MLXSW_REG_SFMR_LEN]; @@ -539,7 +593,12 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to add PVID %d\n", vid_begin); goto err_port_pvid_set; } - mlxsw_sp_port->pvid = vid_begin; + } else if (!flag_pvid && old_pvid >= vid_begin && old_pvid <= vid_end) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); + if (err) { + netdev_err(dev, "Unable to del PVID\n"); + goto err_port_pvid_set; + } } /* Changing activity bits only if HW operation succeded */ @@ -891,20 +950,18 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return err; } + if (init) + goto out; + pvid = mlxsw_sp_port->pvid; - if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) { - /* Default VLAN is always 1 */ - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + if (pvid >= vid_begin && pvid <= vid_end) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); if (err) { netdev_err(dev, "Unable to del PVID %d\n", pvid); return err; } - mlxsw_sp_port->pvid = 1; } - if (init) - goto out; - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, false); if (err) { @@ -936,6 +993,14 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, vlan->vid_begin, vlan->vid_end, false); } +void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) +{ + u16 vid; + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false); +} + static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) @@ -1040,10 +1105,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) + switchdev_obj_dump_cb_t *cb, + struct net_device *orig_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u16 vport_vid = 0, vport_fid = 0; + struct mlxsw_sp_port *tmp; + u16 vport_fid = 0; char *sfd_pl; char mac[ETH_ALEN]; u16 fid; @@ -1058,13 +1125,11 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (!sfd_pl) return -ENOMEM; - mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock); if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { u16 tmp; tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); vport_fid = mlxsw_sp_vfid_to_fid(tmp); - vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); } mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); @@ -1088,12 +1153,13 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid, &local_port); if (local_port == mlxsw_sp_port->local_port) { - if (vport_fid && vport_fid != fid) - continue; - else if (vport_fid) - fdb->vid = vport_vid; - else + if (vport_fid && vport_fid == fid) + fdb->vid = 0; + else if (!vport_fid && + !mlxsw_sp_fid_is_vfid(fid)) fdb->vid = fid; + else + continue; ether_addr_copy(fdb->addr, mac); fdb->ndm_state = NUD_REACHABLE; err = cb(&fdb->obj); @@ -1104,14 +1170,22 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, mac, &fid, &lag_id); - if (mlxsw_sp_port == - mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { - if (vport_fid && vport_fid != fid) + tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (tmp && tmp->local_port == + mlxsw_sp_port->local_port) { + /* LAG records can only point to LAG + * devices or VLAN devices on top. + */ + if (!netif_is_lag_master(orig_dev) && + !is_vlan_dev(orig_dev)) continue; - else if (vport_fid) - fdb->vid = vport_vid; - else + if (vport_fid && vport_fid == fid) + fdb->vid = 0; + else if (!vport_fid && + !mlxsw_sp_fid_is_vfid(fid)) fdb->vid = fid; + else + continue; ether_addr_copy(fdb->addr, mac); fdb->ndm_state = NUD_REACHABLE; err = cb(&fdb->obj); @@ -1124,7 +1198,6 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); out: - mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock); kfree(sfd_pl); return stored_err ? stored_err : err; } @@ -1176,7 +1249,8 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev, break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_FDB(obj), cb); + SWITCHDEV_OBJ_PORT_FDB(obj), cb, + obj->orig_dev); break; default: err = -EOPNOTSUPP; @@ -1194,14 +1268,14 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; -static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, - bool adding, char *mac, u16 vid, +static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding, + char *mac, u16 vid, struct net_device *dev) { struct switchdev_notifier_fdb_info info; unsigned long notifier_type; - if (learning && learning_sync) { + if (learning_sync) { info.addr = mac; info.vid = vid; notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; @@ -1237,7 +1311,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); goto just_remove; } - vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + vid = 0; /* Override the physical port with the vPort. */ mlxsw_sp_port = mlxsw_sp_vport; } else { @@ -1257,8 +1331,7 @@ do_fdb_op: if (!do_notification) return; - mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, - mlxsw_sp_port->learning_sync, + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac, vid, mlxsw_sp_port->dev); return; @@ -1273,6 +1346,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, bool adding) { struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *dev; char mac[ETH_ALEN]; u16 lag_vid = 0; u16 lag_id; @@ -1298,11 +1372,13 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } - vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - lag_vid = vid; + lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + dev = mlxsw_sp_vport->dev; + vid = 0; /* Override the physical port with the vPort. */ mlxsw_sp_port = mlxsw_sp_vport; } else { + dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev; vid = fid; } @@ -1319,10 +1395,8 @@ do_fdb_op: if (!do_notification) return; - mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, - mlxsw_sp_port->learning_sync, - adding, mac, vid, - mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac, + vid, dev); return; just_remove: @@ -1374,7 +1448,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); - mutex_lock(&mlxsw_sp->fdb_lock); + rtnl_lock(); do { mlxsw_reg_sfn_pack(sfn_pl); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); @@ -1387,7 +1461,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); } while (num_rec); - mutex_unlock(&mlxsw_sp->fdb_lock); + rtnl_unlock(); kfree(sfn_pl); mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); @@ -1402,7 +1476,6 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n"); return err; } - mutex_init(&mlxsw_sp->fdb_lock); INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work); mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index d85960cfb694..7a60a26759b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -43,6 +43,7 @@ #include <linux/device.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> +#include <net/devlink.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -78,6 +79,7 @@ struct mlxsw_sx_port { struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sx *mlxsw_sx; u8 local_port; + struct devlink_port devlink_port; }; /* tx_hdr_version @@ -953,7 +955,9 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port, static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) { + struct devlink *devlink = priv_to_devlink(mlxsw_sx->core); struct mlxsw_sx_port *mlxsw_sx_port; + struct devlink_port *devlink_port; struct net_device *dev; bool usable; int err; @@ -1007,6 +1011,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto port_not_usable; } + devlink_port = &mlxsw_sx_port->devlink_port; + err = devlink_port_register(devlink, devlink_port, local_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register devlink port\n", + mlxsw_sx_port->local_port); + goto err_devlink_port_register; + } + err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1064,6 +1076,8 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto err_register_netdev; } + devlink_port_type_eth_set(devlink_port, dev); + mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; @@ -1075,6 +1089,8 @@ err_port_mtu_set: err_port_speed_set: err_port_swid_set: err_port_system_port_mapping_set: + devlink_port_unregister(&mlxsw_sx_port->devlink_port); +err_devlink_port_register: port_not_usable: err_port_module_check: err_dev_addr_get: @@ -1087,11 +1103,15 @@ err_alloc_stats: static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) { struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; + struct devlink_port *devlink_port; if (!mlxsw_sx_port) return; + devlink_port = &mlxsw_sx_port->devlink_port; + devlink_port_type_clear(devlink_port); unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); + devlink_port_unregister(devlink_port); free_percpu(mlxsw_sx_port->pcpu_stats); free_netdev(mlxsw_sx_port->dev); } |