diff options
author | David S. Miller <davem@davemloft.net> | 2022-05-11 14:12:27 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-05-11 14:12:27 +0300 |
commit | dc3a2001f61611347c057fea422c382b9ce3cfcb (patch) | |
tree | e9eae7cb5bdb0649b74896e8213c1a4884bba33e | |
parent | 53a332f222c015cb82349fd4f6b58cb14f574e8d (diff) | |
parent | 7f46a0b7327ae261f9981888708dbca22c283900 (diff) | |
download | linux-dc3a2001f61611347c057fea422c382b9ce3cfcb.tar.xz |
Merge tag 'mlx5-updates-2022-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says:
====================
mlx5-updates-2022-05-09
1) Gavin Li, adds exit route from waiting for FW init on device boot and
increases FW init timeout on health recovery flow
2) Support 4 ports HCAs LAG mode
Mark Bloch Says:
================
This series adds to mlx5 drivers support for 4 ports HCAs.
Starting with ConnectX-7 HCAs with 4 ports are possible.
As most driver parts aren't affected by such configuration most driver
code is unchanged.
Specially the only affected areas are:
- Lag
- Devcom
- Merged E-Switch
- Single FDB E-Switch
Lag was chosen to be converted first. Creating hardware LAG when all 4
ports are added to the same bond device.
Devom, merge E-Switch and single FDB E-Switch, are marked as supporting
only 2 ports HCAs and future patches will add support for 4 ports HCAs.
In order to activate the hardware lag a user can execute the:
ip link add bond0 type bond
ip link set bond0 type bond miimon 100 mode 2
ip link set eth2 master bond0
ip link set eth3 master bond0
ip link set eth4 master bond0
ip link set eth5 master bond0
Where eth2, eth3, eth4 and eth5 are the PFs of the same HCA.
================
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
22 files changed, 720 insertions, 302 deletions
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 3ad8f637c589..b804f2dd5628 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -100,7 +100,7 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, port_type) == MLX5_CAP_PORT_TYPE_IB) num_qps = pd->device->attrs.max_pkeys; else if (dev->lag_active) - num_qps = MLX5_MAX_PORTS; + num_qps = dev->lag_ports; } gsi = &mqp->gsi; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61aa196d6484..61a3b767262f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2991,6 +2991,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4f04bb55c4c6..8b3c83c0b70a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1131,6 +1131,7 @@ struct mlx5_ib_dev { struct xarray sig_mrs; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u16 pkey_table_len; + u8 lag_ports; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3f467557d34e..fb8669c02546 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3907,7 +3907,7 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; return (unsigned int)atomic_add_return(1, tx_port_affinity) % - MLX5_MAX_PORTS + 1; + (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1; } static bool qp_supports_affinity(struct mlx5_ib_qp *qp) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 81620c25c77e..7895ed7cc285 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ - fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o lib/tout.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ba6dad97e308..11f7c03ae81b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -555,12 +555,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) PCI_SLOT(dev->pdev->devfn)); } -static int next_phys_dev(struct device *dev, const void *data) +static int _next_phys_dev(struct mlx5_core_dev *mdev, + const struct mlx5_core_dev *curr) { - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); - struct mlx5_core_dev *mdev = madev->mdev; - const struct mlx5_core_dev *curr = data; - if (!mlx5_core_is_pf(mdev)) return 0; @@ -574,8 +571,30 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* Must be called with intf_mutex held */ -struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +static int next_phys_dev(struct device *dev, const void *data) +{ + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_core_dev *mdev = madev->mdev; + + return _next_phys_dev(mdev, data); +} + +static int next_phys_dev_lag(struct device *dev, const void *data) +{ + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_core_dev *mdev = madev->mdev; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager) || + !MLX5_CAP_GEN(mdev, lag_master) || + (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) + return 0; + + return _next_phys_dev(mdev, data); +} + +static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, + int (*match)(struct device *dev, const void *data)) { struct auxiliary_device *adev; struct mlx5_adev *madev; @@ -583,7 +602,7 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; - adev = auxiliary_find_device(NULL, dev, &next_phys_dev); + adev = auxiliary_find_device(NULL, dev, match); if (!adev) return NULL; @@ -592,6 +611,20 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return madev->mdev; } +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev_lag); +} + void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e8789e6d7e7b..f85166e587f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -178,13 +178,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); default: /* Unsupported action should not get to this function */ WARN_ON(1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 25f2d2717aaa..719ef26d23c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1569,9 +1569,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); - lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); - lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; @@ -1615,7 +1613,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); - lockdep_unregister_key(&esw->mode_lock_key); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1893,17 +1890,6 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) -{ - if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || - (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && - dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) - return true; - - return false; -} - bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { @@ -2015,17 +2001,6 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw) } /** - * mlx5_esw_lock() - Take write lock on esw mode lock - * @esw: eswitch device. - */ -void mlx5_esw_lock(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_allowed(esw)) - return; - down_write(&esw->mode_lock); -} - -/** * mlx5_eswitch_get_total_vports - Get total vports of the eswitch * * @dev: Pointer to core device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index bac5160837c5..2754a732914d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -331,7 +331,6 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - struct lock_class_key mode_lock_key; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -518,8 +517,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1); bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); @@ -706,7 +703,6 @@ void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); -void mlx5_esw_lock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); @@ -724,7 +720,6 @@ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} -static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } @@ -733,9 +728,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } -static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } - static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ca1aba845dd6..84df0d56a2b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -148,7 +148,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_load_one(dev); + mlx5_load_one(dev, false); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c new file mode 100644 index 000000000000..443daf6e3d4b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lag.h" + +static char *get_str_mode_type(struct mlx5_lag *ldev) +{ + if (ldev->flags & MLX5_LAG_FLAG_ROCE) + return "roce"; + if (ldev->flags & MLX5_LAG_FLAG_SRIOV) + return "switchdev"; + if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH) + return "multipath"; + + return NULL; +} + +static int type_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + char *mode = NULL; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_mode_type(ldev); + mutex_unlock(&ldev->lock); + if (!mode) + return -EINVAL; + seq_printf(file, "%s\n", mode); + + return 0; +} + +static int port_sel_mode_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int ret = 0; + char *mode; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_port_sel_mode(ldev->flags); + else + ret = -EINVAL; + mutex_unlock(&ldev->lock); + if (ret || !mode) + return ret; + + seq_printf(file, "%s\n", mode); + return 0; +} + +static int state_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + seq_printf(file, "%s\n", active ? "active" : "disabled"); + return 0; +} + +static int flags_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool shared_fdb; + bool lag_active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) + shared_fdb = ldev->shared_fdb; + + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + return 0; +} + +static int mapping_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + u8 ports[MLX5_MAX_PORTS] = {}; + struct mlx5_lag *ldev; + bool hash = false; + bool lag_active; + int num_ports; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) { + if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + &num_ports); + hash = true; + } else { + for (i = 0; i < ldev->ports; i++) + ports[i] = ldev->v2p_map[i]; + num_ports = ldev->ports; + } + } + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + for (i = 0; i < num_ports; i++) { + if (hash) + seq_printf(file, "%d\n", ports[i] + 1); + else + seq_printf(file, "%d:%d\n", i + 1, ports[i]); + } + + return 0; +} + +static int members_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + } + mutex_unlock(&ldev->lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(type); +DEFINE_SHOW_ATTRIBUTE(port_sel_mode); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(flags); +DEFINE_SHOW_ATTRIBUTE(mapping); +DEFINE_SHOW_ATTRIBUTE(members); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) +{ + struct dentry *dbg; + + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); + dev->priv.dbg.lag_debugfs = dbg; + + debugfs_create_file("type", 0444, dbg, dev, &type_fops); + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); + debugfs_create_file("state", 0444, dbg, dev, &state_fops); + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); + debugfs_create_file("members", 0444, dbg, dev, &members_fops); +} + +void mlx5_ldev_remove_debugfs(struct dentry *dbg) +{ + debugfs_remove_recursive(dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 6cad3b72c133..b6dd9043061f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -53,8 +53,7 @@ enum { */ static DEFINE_SPINLOCK(lag_lock); -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2, bool shared_fdb, u8 flags) +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); @@ -63,8 +62,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) { - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); } else { MLX5_SET(lagc, lag_ctx, port_select_mode, MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT); @@ -73,8 +72,8 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, return mlx5_cmd_exec_in(dev, create_lag, in); } -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, + u8 *ports) { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); @@ -82,8 +81,8 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); MLX5_SET(modify_lag_in, in, field_select, 0x1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); return mlx5_cmd_exec_in(dev, modify_lag, in); } @@ -108,6 +107,75 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + +static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) +{ + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; + int i; + int j; + + if (flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } +} + static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr); static void mlx5_do_bond_work(struct work_struct *work); @@ -121,6 +189,7 @@ static void mlx5_ldev_free(struct kref *ref) mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); + mutex_destroy(&ldev->lock); kfree(ldev); } @@ -150,6 +219,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) } kref_init(&ldev->ref); + mutex_init(&ldev->lock); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); ldev->nb.notifier_call = mlx5_lag_netdev_event; @@ -162,6 +232,8 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) if (err) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; return ldev; } @@ -171,7 +243,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (ldev->pf[i].netdev == ndev) return i; @@ -188,39 +260,72 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); } +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 *port1, u8 *port2) + u8 num_ports, + u8 buckets, + u8 *ports) { - bool p1en; - bool p2en; + int disabled[MLX5_MAX_PORTS] = {}; + int enabled[MLX5_MAX_PORTS] = {}; + int disabled_ports_num = 0; + int enabled_ports_num = 0; + int idx; + u32 rand; + int i; + int j; - p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && - tracker->netdev_state[MLX5_LAG_P1].link_up; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + enabled[enabled_ports_num++] = i; + else + disabled[disabled_ports_num++] = i; + } - p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && - tracker->netdev_state[MLX5_LAG_P2].link_up; + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ + for (i = 0; i < num_ports; i++) + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } - *port1 = MLX5_LAG_EGRESS_PORT_1; - *port2 = MLX5_LAG_EGRESS_PORT_2; - if ((!p1en && !p2en) || (p1en && p2en)) + /* If all ports are disabled/enabled keep native mapping */ + if (enabled_ports_num == num_ports || + disabled_ports_num == num_ports) return; - if (p1en) - *port2 = MLX5_LAG_EGRESS_PORT_1; - else - *port1 = MLX5_LAG_EGRESS_PORT_2; + /* Go over the disabled ports and for each assign a random active port */ + for (i = 0; i < disabled_ports_num; i++) { + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } + } } static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { - return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop; + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].has_drop) + return true; + return false; } static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].has_drop) continue; @@ -233,12 +338,12 @@ static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, struct lag_tracker *tracker) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - struct mlx5_core_dev *inactive; - u8 v2p_port1, v2p_port2; - int inactive_idx; + u8 disabled_ports[MLX5_MAX_PORTS] = {}; + struct mlx5_core_dev *dev; + int disabled_index; + int num_disabled; int err; + int i; /* First delete the current drop rule so there won't be any dropped * packets @@ -248,58 +353,60 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, if (!ldev->tracker.has_inactive) return; - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); - if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) { - inactive = dev1; - inactive_idx = MLX5_LAG_P2; - } else { - inactive = dev0; - inactive_idx = MLX5_LAG_P1; + for (i = 0; i < num_disabled; i++) { + disabled_index = disabled_ports[i]; + dev = ldev->pf[disabled_index].dev; + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[disabled_index].has_drop = true; + else + mlx5_core_err(dev, + "Failed to create lag drop rule, error: %d", err); } - - err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch, - MLX5_VPORT_UPLINK); - if (!err) - ldev->pf[inactive_idx].has_drop = true; - else - mlx5_core_err(inactive, - "Failed to create lag drop rule, error: %d", err); } -static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) - return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2); - return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + return mlx5_lag_port_sel_modify(ldev, ports); + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); } void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u8 v2p_port1, v2p_port2; + int idx; int err; + int i; + int j; - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, - &v2p_port2); + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); - if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || - v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { - err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2); - if (err) { - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); - return; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ports[idx] == ldev->v2p_map[idx]) + continue; + err = _mlx5_modify_lag(ldev, ports); + if (err) { + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + return; + } + memcpy(ldev->v2p_map, ports, sizeof(ports)); + + mlx5_lag_print_mapping(dev0, ldev, tracker, + ldev->flags); + break; } - ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; - ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; - mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); } if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && @@ -307,20 +414,47 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, mlx5_lag_drop_rule_setup(ldev, tracker); } -static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, - struct lag_tracker *tracker, u8 *flags) +#define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4 +static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, + struct lag_tracker *tracker, u8 *flags) { - bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; - if (roce_lag || - !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) || - tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH) - return; - *flags |= MLX5_LAG_FLAG_HASH_BASED; + if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) { + /* Four ports are support only in hash mode */ + if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table)) + return -EINVAL; + *flags |= MLX5_LAG_FLAG_HASH_BASED; + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; + } + + return 0; +} + +static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, + struct lag_tracker *tracker, u8 *flags) +{ + struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; + + if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) + *flags |= MLX5_LAG_FLAG_HASH_BASED; + + return 0; +} + +static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, + struct lag_tracker *tracker, u8 *flags) +{ + bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); + + if (roce_lag) + return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags); + return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags); } -static char *get_str_port_sel_mode(u8 flags) +char *get_str_port_sel_mode(u8 flags) { if (flags & MLX5_LAG_FLAG_HASH_BASED) return "hash"; @@ -336,12 +470,11 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); + mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", shared_fdb, get_str_port_sel_mode(flags)); - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, shared_fdb, flags); if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -377,13 +510,15 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], - &ldev->v2p_map[MLX5_LAG_P2]); - mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); + err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); + if (err) + return err; + + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + if (flags & MLX5_LAG_FLAG_HASH_BASED) { err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); + ldev->v2p_map); if (err) { mlx5_core_err(dev0, "Failed to create LAG port selection(%d)\n", @@ -455,25 +590,43 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) return 0; } +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) - return false; +#ifdef CONFIG_MLX5_ESWITCH + u8 mode; +#endif + int i; + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].dev) + return false; #ifdef CONFIG_MLX5_ESWITCH - return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); + mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev); + + if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS) + return false; + + for (i = 0; i < ldev->ports; i++) + if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + return false; + + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + return false; #else - return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && - !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); + for (i = 0; i < ldev->ports; i++) + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + return false; #endif + return true; } static void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].dev) continue; @@ -490,7 +643,7 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].dev) continue; @@ -510,6 +663,7 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) bool shared_fdb = ldev->shared_fdb; bool roce_lag; int err; + int i; roce_lag = __mlx5_lag_is_roce(ldev); @@ -520,7 +674,8 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - mlx5_nic_vport_disable_roce(dev1); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_disable_roce(ldev->pf[i].dev); } err = mlx5_deactivate_lag(ldev); @@ -557,6 +712,23 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) return false; } +static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) +{ + bool roce_lag = true; + int i; + + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && + ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE; +#endif + + return roce_lag; +} + static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; @@ -564,6 +736,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct lag_tracker tracker; bool do_bond, roce_lag; int err; + int i; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; @@ -580,14 +753,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (do_bond && !__mlx5_lag_is_active(ldev)) { bool shared_fdb = mlx5_shared_fdb_supported(ldev); - roce_lag = !mlx5_sriov_is_enabled(dev0) && - !mlx5_sriov_is_enabled(dev1); - -#ifdef CONFIG_MLX5_ESWITCH - roce_lag = roce_lag && - dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; -#endif + roce_lag = mlx5_lag_is_roce_lag(ldev); if (shared_fdb || roce_lag) mlx5_lag_remove_devices(ldev); @@ -604,7 +770,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - mlx5_nic_vport_enable_roce(dev1); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } else if (shared_fdb) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); @@ -636,31 +803,11 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } -static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev0) - mlx5_esw_lock(dev0->priv.eswitch); - if (dev1) - mlx5_esw_lock(dev1->priv.eswitch); -} - -static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev1) - mlx5_esw_unlock(dev1->priv.eswitch); - if (dev0) - mlx5_esw_unlock(dev0->priv.eswitch); -} - static void mlx5_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, bond_work); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int status; status = mlx5_dev_list_trylock(); @@ -669,15 +816,16 @@ static void mlx5_do_bond_work(struct work_struct *work) return; } + mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); mlx5_queue_bond_work(ldev, HZ); return; } - mlx5_lag_lock_eswitches(dev0, dev1); mlx5_do_bond(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); } @@ -691,7 +839,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, bool is_bonded, is_in_lag, mode_supported; bool has_inactive = 0; struct slave *slave; - int bond_status = 0; + u8 bond_status = 0; int num_slaves = 0; int changed = 0; int idx; @@ -722,7 +870,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_unlock(); /* None of this lagdev's netdevs are slaves of this master. */ - if (!(bond_status & 0x3)) + if (!(bond_status & GENMASK(ldev->ports - 1, 0))) return 0; if (lag_upper_info) { @@ -735,7 +883,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. */ - is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + is_in_lag = num_slaves == ldev->ports && + bond_status == GENMASK(ldev->ports - 1, 0); /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || @@ -864,7 +1013,7 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, { unsigned int fn = mlx5_get_dev_index(dev); - if (fn >= MLX5_MAX_PORTS) + if (fn >= ldev->ports) return; spin_lock(&lag_lock); @@ -880,7 +1029,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, int i; spin_lock(&lag_lock); - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (ldev->pf[i].netdev == netdev) { ldev->pf[i].netdev = NULL; break; @@ -894,24 +1043,23 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, { unsigned int fn = mlx5_get_dev_index(dev); - if (fn >= MLX5_MAX_PORTS) + if (fn >= ldev->ports) return; ldev->pf[fn].dev = dev; dev->priv.lag = ldev; } -/* Must be called with intf_mutex held */ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (ldev->pf[i].dev == dev) break; - if (i == MLX5_MAX_PORTS) + if (i == ldev->ports) return; ldev->pf[i].dev = NULL; @@ -924,12 +1072,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return 0; - - tmp_dev = mlx5_get_next_phys_dev(dev); + tmp_dev = mlx5_get_next_phys_dev_lag(dev); if (tmp_dev) ldev = tmp_dev->priv.lag; @@ -939,13 +1082,18 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to alloc lag dev\n"); return 0; } - } else { - if (ldev->mode_changes_in_progress) - return -EAGAIN; - mlx5_ldev_get(ldev); + mlx5_ldev_add_mdev(ldev, dev); + return 0; } + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + return -EAGAIN; + } + mlx5_ldev_get(ldev); mlx5_ldev_add_mdev(ldev, dev); + mutex_unlock(&ldev->lock); return 0; } @@ -958,15 +1106,19 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) if (!ldev) return; + /* mdev is being removed, might as well remove debugfs + * as early as possible. + */ + mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); recheck: - mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) { - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); msleep(100); goto recheck; } mlx5_ldev_remove_mdev(ldev, dev); - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); mlx5_ldev_put(ldev); } @@ -974,35 +1126,45 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { int err; + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) + return; + recheck: mlx5_dev_list_lock(); err = __mlx5_lag_dev_add_mdev(dev); + mlx5_dev_list_unlock(); + if (err) { - mlx5_dev_list_unlock(); msleep(100); goto recheck; } - mlx5_dev_list_unlock(); + mlx5_ldev_add_debugfs(dev); } -/* Must be called with intf_mutex held */ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev; + bool lag_is_active; ldev = mlx5_lag_dev(dev); if (!ldev) return; + mutex_lock(&ldev->lock); mlx5_ldev_remove_netdev(ldev, netdev); ldev->flags &= ~MLX5_LAG_FLAG_READY; - if (__mlx5_lag_is_active(ldev)) + lag_is_active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + + if (lag_is_active) mlx5_queue_bond_work(ldev, 0); } -/* Must be called with intf_mutex held */ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { @@ -1013,14 +1175,16 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, if (!ldev) return; + mutex_lock(&ldev->lock); mlx5_ldev_add_netdev(ldev, dev, netdev); - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (!ldev->pf[i].dev) break; - if (i >= MLX5_MAX_PORTS) + if (i >= ldev->ports) ldev->flags |= MLX5_LAG_FLAG_READY; + mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); } @@ -1097,8 +1261,6 @@ EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); void mlx5_lag_disable_change(struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev0; - struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; ldev = mlx5_lag_dev(dev); @@ -1106,16 +1268,13 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) return; mlx5_dev_list_lock(); - - dev0 = ldev->pf[MLX5_LAG_P1].dev; - dev1 = ldev->pf[MLX5_LAG_P2].dev; + mutex_lock(&ldev->lock); ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) { - mlx5_lag_lock_eswitches(dev0, dev1); + if (__mlx5_lag_is_active(ldev)) mlx5_disable_lag(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); - } + + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); } @@ -1127,9 +1286,9 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) if (!ldev) return; - mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); ldev->mode_changes_in_progress--; - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); } @@ -1137,6 +1296,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; struct mlx5_lag *ldev; + int i; spin_lock(&lag_lock); ldev = mlx5_lag_dev(dev); @@ -1145,9 +1305,11 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? - ldev->pf[MLX5_LAG_P1].netdev : - ldev->pf[MLX5_LAG_P2].netdev; + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; } else { ndev = ldev->pf[MLX5_LAG_P1].netdev; } @@ -1166,18 +1328,21 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, { struct mlx5_lag *ldev; u8 port = 0; + int i; spin_lock(&lag_lock); ldev = mlx5_lag_dev(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; - if (ldev->pf[MLX5_LAG_P1].netdev == slave) - port = MLX5_LAG_P1; - else - port = MLX5_LAG_P2; + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + port = i; + break; + } + } - port = ldev->v2p_map[port]; + port = ldev->v2p_map[port * ldev->buckets]; unlock: spin_unlock(&lag_lock); @@ -1185,6 +1350,18 @@ unlock: } EXPORT_SYMBOL(mlx5_lag_get_slave_port); +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + return ldev->ports; +} +EXPORT_SYMBOL(mlx5_lag_get_num_ports); + struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) { struct mlx5_core_dev *peer_dev = NULL; @@ -1211,7 +1388,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, size_t *offsets) { int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_core_dev **mdev; struct mlx5_lag *ldev; int num_ports; int ret, i, j; @@ -1221,14 +1398,20 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, if (!out) return -ENOMEM; + mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); + if (!mdev) { + ret = -ENOMEM; + goto free_out; + } + memset(values, 0, sizeof(*values) * num_counters); spin_lock(&lag_lock); ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { - num_ports = MLX5_MAX_PORTS; - mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; - mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; + num_ports = ldev->ports; + for (i = 0; i < ldev->ports; i++) + mdev[i] = ldev->pf[i].dev; } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; @@ -1243,13 +1426,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, out); if (ret) - goto free; + goto free_mdev; for (j = 0; j < num_counters; ++j) values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); } -free: +free_mdev: + kvfree(mdev); +free_out: kvfree(out); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index cbf9a9003e55..46683b84ff84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -4,6 +4,9 @@ #ifndef __MLX5_LAG_H__ #define __MLX5_LAG_H__ +#include <linux/debugfs.h> + +#define MLX5_LAG_MAX_HASH_BUCKETS 16 #include "mlx5_core.h" #include "mp.h" #include "port_sel.h" @@ -45,9 +48,11 @@ struct lag_tracker { */ struct mlx5_lag { u8 flags; + u8 ports; + u8 buckets; int mode_changes_in_progress; bool shared_fdb; - u8 v2p_map[MLX5_MAX_PORTS]; + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; struct kref ref; struct lag_func pf[MLX5_MAX_PORTS]; struct lag_tracker tracker; @@ -56,6 +61,8 @@ struct mlx5_lag { struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; + /* Protect lag fields/state changes */ + struct mutex lock; }; static inline struct mlx5_lag * @@ -85,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); +char *get_str_port_sel_mode(u8 flags); +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); +void mlx5_ldev_remove_debugfs(struct dentry *dbg); + #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 5be322528279..d3a3fe4ce670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -12,7 +12,8 @@ enum { static struct mlx5_flow_group * mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_definer *definer) + struct mlx5_flow_definer *definer, + u8 rules) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -25,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, MLX5_SET(create_flow_group_in, in, match_definer_id, mlx5_get_match_definer_id(definer)); MLX5_SET(create_flow_group_in, in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1); + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); MLX5_SET(create_flow_group_in, in, group_type, MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); @@ -36,7 +37,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer, - u8 port1, u8 port2) + u8 *ports) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_flow_table_attr ft_attr = {}; @@ -44,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; int err, i; + int idx; + int j; - ft_attr.max_fte = MLX5_MAX_PORTS; + ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); @@ -61,7 +64,8 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, } lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, - lag_definer->definer); + lag_definer->definer, + ft_attr.max_fte); if (IS_ERR(lag_definer->fg)) { err = PTR_ERR(lag_definer->fg); goto destroy_ft; @@ -70,19 +74,25 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.flags |= FLOW_ACT_NO_APPEND; - for (i = 0; i < MLX5_MAX_PORTS; i++) { - u8 affinity = i == 0 ? port1 : port2; - - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, - vhca_id); - lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft, - NULL, &flow_act, - &dest, 1); - if (IS_ERR(lag_definer->rules[i])) { - err = PTR_ERR(lag_definer->rules[i]); - while (i--) - mlx5_del_flow_rules(lag_definer->rules[i]); - goto destroy_fg; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + u8 affinity; + + idx = i * ldev->buckets + j; + affinity = ports[idx]; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); + while (i--) + while (j--) + mlx5_del_flow_rules(lag_definer->rules[idx]); + goto destroy_fg; + } } } @@ -279,8 +289,7 @@ static int mlx5_lag_set_definer(u32 *match_definer_mask, static struct mlx5_lag_definer * mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, - enum mlx5_traffic_types tt, bool tunnel, u8 port1, - u8 port2) + enum mlx5_traffic_types tt, bool tunnel, u8 *ports) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_lag_definer *lag_definer; @@ -308,7 +317,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, goto free_mask; } - err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2); + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports); if (err) goto destroy_match_definer; @@ -329,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int idx; int i; + int j; - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_del_flow_rules(lag_definer->rules[i]); + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); + } + } mlx5_destroy_flow_group(lag_definer->fg); mlx5_destroy_flow_table(lag_definer->ft); mlx5_destroy_match_definer(dev, lag_definer->definer); @@ -356,7 +371,7 @@ static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev) static int mlx5_lag_create_definers(struct mlx5_lag *ldev, enum netdev_lag_hash hash_type, - u8 port1, u8 port2) + u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_lag_definer *lag_definer; @@ -364,7 +379,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev, for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, - false, port1, port2); + false, ports); if (IS_ERR(lag_definer)) { err = PTR_ERR(lag_definer); goto destroy_definers; @@ -376,7 +391,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev, lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, - true, port1, port2); + true, ports); if (IS_ERR(lag_definer)) { err = PTR_ERR(lag_definer); goto destroy_definers; @@ -513,13 +528,13 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) } int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, - enum netdev_lag_hash hash_type, u8 port1, u8 port2) + enum netdev_lag_hash hash_type, u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; int err; set_tt_map(port_sel, hash_type); - err = mlx5_lag_create_definers(ldev, hash_type, port1, port2); + err = mlx5_lag_create_definers(ldev, hash_type, ports); if (err) return err; @@ -543,52 +558,62 @@ destroy_definers: return err; } -static int -mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, - struct mlx5_lag_definer **definers, - u8 port1, u8 port2) +static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer *def, + u8 *ports) { - struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_flow_destination dest = {}; + int idx; int err; - int tt; + int i; + int j; dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { - struct mlx5_flow_handle **rules = definers[tt]->rules; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ldev->v2p_map[i] == ports[i]) + continue; - if (ldev->v2p_map[MLX5_LAG_P1] != port1) { - dest.vport.vhca_id = - MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id); - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1], - &dest, NULL); + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, + vhca_id); + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) return err; } + } - if (ldev->v2p_map[MLX5_LAG_P2] != port2) { - dest.vport.vhca_id = - MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id); - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2], - &dest, NULL); - if (err) - return err; - } + return 0; +} + +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); + if (err) + return err; } return 0; } -int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; int err; err = mlx5_lag_modify_definers_destinations(ldev, port_sel->outer.definers, - port1, port2); + ports); if (err) return err; @@ -597,7 +622,7 @@ int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) return mlx5_lag_modify_definers_destinations(ldev, port_sel->inner.definers, - port1, port2); + ports); } void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index 6d15b28a42fc..5ec3af2a3ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -10,7 +10,10 @@ struct mlx5_lag_definer { struct mlx5_flow_definer *definer; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct mlx5_flow_handle *rules[MLX5_MAX_PORTS]; + /* Each port has ldev->buckets number of rules and they are arrange in + * [port * buckets .. port * buckets + buckets) locations + */ + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; }; struct mlx5_lag_ttc { @@ -27,22 +30,20 @@ struct mlx5_lag_port_sel { #ifdef CONFIG_MLX5_ESWITCH -int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2); +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports); void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, - enum netdev_lag_hash hash_type, u8 port1, - u8 port2); + enum netdev_lag_hash hash_type, u8 *ports); #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, enum netdev_lag_hash hash_type, - u8 port1, u8 port2) + u8 *ports) { return 0; } -static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, - u8 port2) +static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index bced2efe9bef..adefde3ea941 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -14,7 +14,7 @@ static LIST_HEAD(devcom_list); struct mlx5_devcom_component { struct { void *data; - } device[MLX5_MAX_PORTS]; + } device[MLX5_DEVCOM_PORTS_SUPPORTED]; mlx5_devcom_event_handler_t handler; struct rw_semaphore sem; @@ -25,7 +25,7 @@ struct mlx5_devcom_list { struct list_head list; struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; - struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; + struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED]; }; struct mlx5_devcom { @@ -74,13 +74,15 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; + if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; sguid0 = mlx5_query_nic_system_image_guid(dev); list_for_each_entry(iter, &devcom_list, list) { struct mlx5_core_dev *tmp_dev = NULL; idx = -1; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { if (iter->devs[i]) tmp_dev = iter->devs[i]; else @@ -134,11 +136,11 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) kfree(devcom); - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (priv->devs[i]) break; - if (i != MLX5_MAX_PORTS) + if (i != MLX5_DEVCOM_PORTS_SUPPORTED) return; list_del(&priv->list); @@ -191,7 +193,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_write(&comp->sem); - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (i != devcom->idx && comp->device[i].data) { err = comp->handler(event, comp->device[i].data, event_data); @@ -239,7 +241,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, return NULL; } - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (i != devcom->idx) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 939d5bf1581b..94313c18bb64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,6 +6,8 @@ #include <linux/mlx5/driver.h> +#define MLX5_DEVCOM_PORTS_SUPPORTED 2 + enum mlx5_devcom_components { MLX5_DEVCOM_ESW_OFFLOADS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index c1df0d3595d8..d758848d34d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -10,6 +10,7 @@ struct mlx5_timeouts { static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000, [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, [MLX5_TO_FW_INIT_MS] = 2000, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 1c42ead782fa..257c03eeab36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -7,6 +7,7 @@ enum mlx5_timeouts_types { /* pre init timeouts (not read from FW) */ MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS, MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, MLX5_TO_FW_PRE_INIT_WAIT_MS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 35e48ef04845..84f75aa25214 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -189,7 +189,8 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, fw_initializing = ioread32be(&dev->iseg->initializing); if (!(fw_initializing >> 31)) break; - if (time_after(jiffies, end)) { + if (time_after(jiffies, end) || + test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { err = -EBUSY; break; } @@ -1002,7 +1003,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) { int err; @@ -1017,11 +1018,11 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT), + err = wait_fw_init(dev, timeout, mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); if (err) { mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", - mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + timeout); return err; } @@ -1271,7 +1272,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, true); + err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); if (err) goto err_function; @@ -1335,9 +1336,10 @@ out: mutex_unlock(&dev->intf_state_mutex); } -int mlx5_load_one(struct mlx5_core_dev *dev) +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) { int err = 0; + u64 timeout; mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1347,7 +1349,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev) /* remove any previous indication of internal error */ dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, false); + if (recovery) + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); + else + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); + err = mlx5_function_setup(dev, timeout); if (err) goto err_function; @@ -1602,6 +1608,7 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); @@ -1717,7 +1724,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) mlx5_pci_trace(dev, "Enter, loading driver..\n"); - err = mlx5_load_one(dev); + err = mlx5_load_one(dev, false); mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, !err ? "recovered" : "Failed"); @@ -1785,6 +1792,7 @@ static void shutdown(struct pci_dev *pdev) int err; mlx5_core_info(dev, "Shutdown was called\n"); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev); @@ -1804,7 +1812,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); } static const struct pci_device_id mlx5_core_pci_table[] = { @@ -1849,7 +1857,7 @@ int mlx5_recover_device(struct mlx5_core_dev *dev) return -EIO; } - return mlx5_load_one(dev); + return mlx5_load_one(dev, true); } static struct pci_driver mlx5_core_driver = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a9b2d6ead542..484cb1e4fc7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -210,6 +210,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); @@ -290,7 +291,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev); -int mlx5_load_one(struct mlx5_core_dev *dev); +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ff47d49d8be4..d6bac3976913 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -84,7 +84,7 @@ enum mlx5_sqp_t { }; enum { - MLX5_MAX_PORTS = 2, + MLX5_MAX_PORTS = 4, }; enum { @@ -558,6 +558,7 @@ struct mlx5_debugfs_entries { struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; struct dentry *pages_debugfs; + struct dentry *lag_debugfs; }; struct mlx5_ft_pool; @@ -632,6 +633,7 @@ enum mlx5_device_state { enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = BIT(0), + MLX5_BREAK_FW_WAIT = BIT(1), }; enum mlx5_pci_status { @@ -1141,6 +1143,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int num_counters, size_t *offsets); struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, |