summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-24 21:48:00 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-24 21:48:00 +0300
commit54d7e8190ecfe72ff0dab96545e782f7298cb69a (patch)
tree1ffd6c6083413871ebc06f30e649e1670726dbe8 /drivers/net/ethernet/mellanox/mlx5/core
parent6fa6588e5964473356f0e2a02093ea42a5b3fd56 (diff)
parent70920941923316b760bc7a804eb3d49a126d8712 (diff)
downloadlinux-54d7e8190ecfe72ff0dab96545e782f7298cb69a.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Usual collection of small improvements and fixes, nothing especially stands out to me here. The new multipath PCI feature is a sign of things to come, I think we will see more of this in the next 10 years. Broadcom and HNS continue to update their drivers for their new HW generations. Summary: - Bug fixes and minor improvments in cxgb4, siw, mlx5, rxe, efa, rts, hfi, erdma, hns, irdma - Code cleanups/typos/etc. Tidy alloc_ordered_workqueue() calls - Multipath PCI for mlx5 - Variable size work queue, SRQ changes, and relaxed ordering for new bnxt HW - New ODP fault resolution FW protocol in mlx5 - New 'rdma monitor' netlink mechanism" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (99 commits) RDMA/bnxt_re: Remove the unused variable en_dev RDMA/nldev: Add missing break in rdma_nl_notify_err_msg() RDMA/irdma: fix error message in irdma_modify_qp_roce() RDMA/cxgb4: Added NULL check for lookup_atid RDMA/hns: Fix ah error counter in sw stat not increasing RDMA/bnxt_re: Recover the device when FW error is detected RDMA/bnxt_re: Group all operations under add_device and remove_device RDMA/bnxt_re: Use the aux device for L2 ULP callbacks RDMA/bnxt_re: Change aux driver data to en_info to hold more information RDMA/nldev: Expose whether RDMA monitoring is supported RDMA/nldev: Add support for RDMA monitoring RDMA/mlx5: Use IB set_netdev and get_netdev functions RDMA/device: Remove optimization in ib_device_get_netdev() RDMA/mlx5: Initialize phys_port_cnt earlier in RDMA device creation RDMA/mlx5: Obtain upper net device only when needed RDMA/mlx5: Check RoCE LAG status before getting netdev RDMA/mlx5: Consider the query_vuid cap for data_direct net/mlx5: Handle memory scheme ODP capabilities RDMA/mlx5: Add implicit MR handling to ODP memory scheme RDMA/mlx5: Add handling for memory scheme page fault events ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c54
2 files changed, 75 insertions, 55 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index cf8045b92689..8577db3308cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -445,6 +445,34 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
}
+static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
+{
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+
+ if (!ldev)
+ goto unlock;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->tracker.netdev_state[i].tx_enabled)
+ ndev = ldev->pf[i].netdev;
+ if (!ndev)
+ ndev = ldev->pf[ldev->ports - 1].netdev;
+
+ if (ndev)
+ dev_hold(ndev);
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return ndev;
+}
+
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker)
{
@@ -477,9 +505,18 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
}
}
- if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
- !(ldev->mode == MLX5_LAG_MODE_ROCE))
- mlx5_lag_drop_rule_setup(ldev, tracker);
+ if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
+
+ if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+ /** Only sriov and roce lag should have tracker->tx_type set so
+ * no need to check the mode
+ */
+ blocking_notifier_call_chain(&dev0->priv.lag_nh,
+ MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
+ ndev);
+ }
}
static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
@@ -613,6 +650,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
mlx5_core_err(dev0,
"Failed to deactivate RoCE LAG; driver restart required\n");
}
+ BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
return err;
}
@@ -1492,38 +1530,6 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
mlx5_queue_bond_work(ldev, 0);
}
-struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
-{
- struct net_device *ndev = NULL;
- struct mlx5_lag *ldev;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&lag_lock, flags);
- ldev = mlx5_lag_dev(dev);
-
- if (!(ldev && __mlx5_lag_is_roce(ldev)))
- goto unlock;
-
- if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
- for (i = 0; i < ldev->ports; i++)
- if (ldev->tracker.netdev_state[i].tx_enabled)
- ndev = ldev->pf[i].netdev;
- if (!ndev)
- ndev = ldev->pf[ldev->ports - 1].netdev;
- } else {
- ndev = ldev->pf[MLX5_LAG_P1].netdev;
- }
- if (ndev)
- dev_hold(ndev);
-
-unlock:
- spin_unlock_irqrestore(&lag_lock, flags);
-
- return ndev;
-}
-EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
-
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8b0abd61eca6..220a9ac75c8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -454,8 +454,8 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
{
+ bool do_set = false, mem_page_fault = false;
void *set_hca_cap;
- bool do_set = false;
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
@@ -470,6 +470,17 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
MLX5_ST_SZ_BYTES(odp_cap));
+ /* For best performance, enable memory scheme ODP only when
+ * it has page prefetch enabled.
+ */
+ if (MLX5_CAP_ODP_MAX(dev, mem_page_fault) &&
+ MLX5_CAP_ODP_MAX(dev, memory_page_fault_scheme_cap.page_prefetch)) {
+ mem_page_fault = true;
+ do_set = true;
+ MLX5_SET(odp_cap, set_hca_cap, mem_page_fault, mem_page_fault);
+ goto set;
+ }
+
#define ODP_CAP_SET_MAX(dev, field) \
do { \
u32 _res = MLX5_CAP_ODP_MAX(dev, field); \
@@ -479,25 +490,28 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
} \
} while (0)
- ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
- ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
- ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
- ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
-
- if (!do_set)
- return 0;
-
- return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.ud_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.rc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.atomic);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.atomic);
+
+set:
+ if (do_set)
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+ mlx5_core_dbg(dev, "Using ODP %s scheme\n",
+ mem_page_fault ? "memory" : "transport");
+ return err;
}
static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)