diff options
author | Mark Bloch <mbloch@nvidia.com> | 2021-08-04 02:19:50 +0300 |
---|---|---|
committer | Saeed Mahameed <saeedm@nvidia.com> | 2021-08-05 23:49:24 +0300 |
commit | c446d9da64075b1c8b55df9acaae72c7686c59a5 (patch) | |
tree | d9e6346938428437ec300b3d6e07dca658abbafc /drivers/infiniband/hw/mlx5/ib_rep.c | |
parent | 979bf468fc543444eb750c8f8817407f509bd504 (diff) | |
download | linux-c446d9da64075b1c8b55df9acaae72c7686c59a5.tar.xz |
RDMA/mlx5: Add shared FDB support
Shared FDB allows to create a single RDMA device that holds representors
from both eswitches. As shared FDB is only active when both uplink
representors are enslaved there is a single RDMA port that represents
both uplinks.
The number of ports is the number of vports on both eswitches minus one
as we only need 1 port for both uplinks.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/ib_rep.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 75 |
1 files changed, 69 insertions, 6 deletions
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index bf5a6e4d1c03..52821485371a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -8,13 +8,15 @@ #include "srq.h" static int -mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep, + int vport_index) { struct mlx5_ib_dev *ibdev; - int vport_index; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); - vport_index = rep->vport_index; + if (!ibdev) + return -EINVAL; ibdev->port[vport_index].rep = rep; rep->rep_data[REP_IB].priv = ibdev; @@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; } +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); const struct mlx5_ib_profile *profile; + struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + u32 peer_num_ports; int vport_index; int ret; + vport_index = rep->vport_index; + + if (mlx5_lag_is_shared_fdb(dev)) { + peer_dev = mlx5_lag_get_peer_mdev(dev); + peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + if (mlx5_lag_is_master(dev)) { + /* Only 1 ib port is the representor for both uplinks */ + num_ports += peer_num_ports - 1; + } else { + if (rep->vport == MLX5_VPORT_UPLINK) + return 0; + vport_index += peer_num_ports; + dev = peer_dev; + } + } + if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep); + return mlx5_ib_set_vport_rep(dev, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; + if (mlx5_lag_is_shared_fdb(dev)) + mlx5_ib_register_peer_vport_reps(dev); return 0; @@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) static void mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { + struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw); struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); + int vport_index = rep->vport_index; struct mlx5_ib_port *port; - port = &dev->port[rep->vport_index]; + if (WARN_ON(!mdev)) + return; + + if (mlx5_lag_is_shared_fdb(mdev) && + !mlx5_lag_is_master(mdev)) { + struct mlx5_core_dev *peer_mdev; + + if (rep->vport == MLX5_VPORT_UPLINK) + return; + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + } + + if (!dev) + return; + + port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; write_unlock(&port->roce.netdev_lock); rep->rep_data[REP_IB].priv = NULL; port->rep = NULL; - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + if (mlx5_lag_is_shared_fdb(mdev)) { + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + } } static const struct mlx5_eswitch_rep_ops rep_ops = { @@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { .get_proto_dev = mlx5_ib_rep_to_dev, }; +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_eswitch *esw; + + if (!peer_mdev) + return; + + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); +} + struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { |