From 57cda166bbe045151d46b2d1133fdf4afccb90ed Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 2 Jan 2018 16:19:28 +0200 Subject: net/mlx5: Add DCT command interface Add a missing command interface to work with a DCT. It includes: creating, destroying and get events for. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 125 ++++++++++++++++++++++++--- 2 files changed, 121 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 60771865c99c..7d3d503fa675 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -417,7 +417,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; mlx5_cq_completion(dev, cqn); break; - + case MLX5_EVENT_TYPE_DCT_DRAINED: + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + mlx5_rsc_event(dev, rsn, eqe->type); + break; case MLX5_EVENT_TYPE_PATH_MIG: case MLX5_EVENT_TYPE_COMM_EST: case MLX5_EVENT_TYPE_SQ_DRAINED: @@ -715,6 +719,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, fpga)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR); + if (MLX5_CAP_GEN_MAX(dev, dct)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index db9e665ab104..0f5ddd22927d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -98,6 +98,11 @@ static u64 sq_allowed_event_types(void) return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR); } +static u64 dct_allowed_event_types(void) +{ + return BIT(MLX5_EVENT_TYPE_DCT_DRAINED); +} + static bool is_event_type_allowed(int rsc_type, int event_type) { switch (rsc_type) { @@ -107,6 +112,8 @@ static bool is_event_type_allowed(int rsc_type, int event_type) return BIT(event_type) & rq_allowed_event_types(); case MLX5_EVENT_QUEUE_TYPE_SQ: return BIT(event_type) & sq_allowed_event_types(); + case MLX5_EVENT_QUEUE_TYPE_DCT: + return BIT(event_type) & dct_allowed_event_types(); default: WARN(1, "Event arrived for unknown resource type"); return false; @@ -116,6 +123,7 @@ static bool is_event_type_allowed(int rsc_type, int event_type) void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) { struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_dct *dct; struct mlx5_core_qp *qp; if (!common) @@ -134,7 +142,11 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) qp = (struct mlx5_core_qp *)common; qp->event(qp, event_type); break; - + case MLX5_RES_DCT: + dct = (struct mlx5_core_dct *)common; + if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) + complete(&dct->drained); + break; default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } @@ -142,9 +154,9 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) mlx5_core_put_rsc(common); } -static int create_qprqsq_common(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp, - int rsc_type) +static int create_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + int rsc_type) { struct mlx5_qp_table *table = &dev->priv.qp_table; int err; @@ -165,8 +177,8 @@ static int create_qprqsq_common(struct mlx5_core_dev *dev, return 0; } -static void destroy_qprqsq_common(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp) +static void destroy_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) { struct mlx5_qp_table *table = &dev->priv.qp_table; unsigned long flags; @@ -179,6 +191,40 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev, wait_for_completion(&qp->common.free); } +int mlx5_core_create_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; + u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + init_completion(&dct->drained); + MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); + return err; + } + + qp->qpn = MLX5_GET(create_dct_out, out, dctn); + err = create_resource_common(dev, qp, MLX5_RES_DCT); + if (err) + goto err_cmd; + + return 0; +err_cmd: + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + mlx5_cmd_exec(dev, (void *)&in, sizeof(din), + (void *)&out, sizeof(dout)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_dct); + int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, int inlen) @@ -197,7 +243,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); - err = create_qprqsq_common(dev, qp, MLX5_RES_QP); + err = create_resource_common(dev, qp, MLX5_RES_QP); if (err) goto err_cmd; @@ -220,6 +266,47 @@ err_cmd: } EXPORT_SYMBOL_GPL(mlx5_core_create_qp); +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); + MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); +} + +int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); + int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { @@ -229,7 +316,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, mlx5_debug_qp_remove(dev, qp); - destroy_qprqsq_common(dev, qp); + destroy_resource_common(dev, qp); MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); @@ -405,6 +492,20 @@ int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, } EXPORT_SYMBOL_GPL(mlx5_core_qp_query); +int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT); + MLX5_SET(query_dct_in, in, dctn, qp->qpn); + + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)out, outlen); +} +EXPORT_SYMBOL_GPL(mlx5_core_dct_query); + int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) { u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; @@ -441,7 +542,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; rq->qpn = rqn; - err = create_qprqsq_common(dev, rq, MLX5_RES_RQ); + err = create_resource_common(dev, rq, MLX5_RES_RQ); if (err) goto err_destroy_rq; @@ -457,7 +558,7 @@ EXPORT_SYMBOL(mlx5_core_create_rq_tracked); void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq) { - destroy_qprqsq_common(dev, rq); + destroy_resource_common(dev, rq); mlx5_core_destroy_rq(dev, rq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); @@ -473,7 +574,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; sq->qpn = sqn; - err = create_qprqsq_common(dev, sq, MLX5_RES_SQ); + err = create_resource_common(dev, sq, MLX5_RES_SQ); if (err) goto err_destroy_sq; @@ -489,7 +590,7 @@ EXPORT_SYMBOL(mlx5_core_create_sq_tracked); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq) { - destroy_qprqsq_common(dev, sq); + destroy_resource_common(dev, sq); mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); -- cgit v1.2.3 From dd44572aebee9260fa1d90569d20d8bab28f90ae Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 2 Jan 2018 16:19:29 +0200 Subject: net/mlx5: Enable DC transport Enable DC transport in the firmware to provide its functionality. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5f323442cc5a..1292aecb09f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -552,6 +552,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) cache_line_128byte, cache_line_size() == 128 ? 1 : 0); + if (MLX5_CAP_GEN_MAX(dev, dct)) + MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); -- cgit v1.2.3 From 734dc065fc41f6143ff88225aa5d335cb1e0f6aa Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:31 +0200 Subject: net/mlx5: Fix race for multiple RoCE enable There are two potential problems with the existing implementation. 1. Enable and disable can race after the atomic operations. 2. If a command fails the refcount is left in an inconsistent state. Introduce a lock and perform error checking. Fixes: a6f7d2aff623 ("net/mlx5: Add support for multiple RoCE enable") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 33 ++++++++++++++++++++----- include/linux/mlx5/driver.h | 2 +- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index d653b0025b13..916523103f16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,9 @@ #include #include "mlx5_core.h" +/* Mutex to hold while enabling or disabling RoCE */ +static DEFINE_MUTEX(mlx5_roce_en_lock); + static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u32 *out, int outlen) { @@ -988,17 +991,35 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) { - if (atomic_inc_return(&mdev->roce.roce_en) != 1) - return 0; - return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (!mdev->roce.roce_en) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + + if (!err) + mdev->roce.roce_en++; + mutex_unlock(&mlx5_roce_en_lock); + + return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) { - if (atomic_dec_return(&mdev->roce.roce_en) != 0) - return 0; - return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (mdev->roce.roce_en) { + mdev->roce.roce_en--; + if (mdev->roce.roce_en == 0) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + + if (err) + mdev->roce.roce_en++; + } + mutex_unlock(&mlx5_roce_en_lock); + return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0776554f18dc..5b0443c9d337 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -835,7 +835,7 @@ struct mlx5_core_dev { struct mlx5e_resources mlx5e_res; struct { struct mlx5_rsvd_gids reserved_gids; - atomic_t roce_en; + u32 roce_en; } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -- cgit v1.2.3 From 8737f818ca3b8ef7c9945525af7df128e1be4575 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:32 +0200 Subject: net/mlx5: Set software owner ID during init HCA Generate a unique 128bit identifier for each host and pass that value to firmware in the INIT_HCA command if it reports the sw_owner_id capability. Each device bound to the mlx5_core driver will have the same software owner ID. In subsequent patches mlx5_core devices will be bound via a new VPort command so that they can operate together under a single InfiniBand device. Only devices that have the same software owner ID can be bound, to prevent traffic intended for one host arriving at another. The INIT_HCA command length was expanded by 128 bits. The command length is provided as an input FW commands. Older FW does not have a problem receiving this command in the new longer form. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 10 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/device.h | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 5 ++++- 5 files changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 5ef1b56b6a96..9d11e92fb541 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -195,12 +195,20 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return 0; } -int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) { u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; + int i; MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + + if (MLX5_CAP_GEN(dev, sw_owner_id)) { + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i, + sw_owner_id[i]); + } + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1292aecb09f2..e382a3ca759e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -75,6 +75,8 @@ static unsigned int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); +static u32 sw_owner_id[4]; + enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, @@ -1055,7 +1057,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_cmd_init_hca(dev); + err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); goto err_pagealloc_stop; @@ -1577,6 +1579,8 @@ static int __init init(void) { int err; + get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); + mlx5_core_verify_params(); mlx5_register_debugfs(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index ff4a0b889a6f..b05868728da7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -86,7 +86,7 @@ enum { int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); -int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9aee835b7393..e5258ee4e38b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -79,6 +79,11 @@ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) +#define MLX5_ARRAY_SET(typ, p, fld, idx, v) do { \ + BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 32); \ + MLX5_SET(typ, p, fld[idx], v); \ +} while (0) + #define MLX5_SET_TO_ONES(typ, p, fld) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 38a7577a9ce7..b1c81d7a86cb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1066,7 +1066,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_5f8[0x3]; u8 log_max_xrq[0x5]; - u8 reserved_at_600[0x200]; + u8 reserved_at_600[0x1e]; + u8 sw_owner_id; + u8 reserved_at_61f[0x1e1]; }; enum mlx5_flow_destination_type { @@ -5531,6 +5533,7 @@ struct mlx5_ifc_init_hca_in_bits { u8 op_mod[0x10]; u8 reserved_at_40[0x40]; + u8 sw_owner_id[4][0x20]; }; struct mlx5_ifc_init2rtr_qp_out_bits { -- cgit v1.2.3 From 32f69e4be269739c3850cd20f1a3322e95c1145f Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:36 +0200 Subject: {net, IB}/mlx5: Manage port association for multiport RoCE When mlx5_ib_add is called determine if the mlx5 core device being added is capable of dual port RoCE operation. If it is, determine whether it is a master device or a slave device using the num_vhca_ports and affiliate_nic_vport_criteria capabilities. If the device is a slave, attempt to find a master device to affiliate it with. Devices that can be affiliated will share a system image guid. If none are found place it on a list of unaffiliated ports. If a master is found bind the port to it by configuring the port affiliation in the NIC vport context. Similarly when mlx5_ib_remove is called determine the port type. If it's a slave port, unaffiliate it from the master device, otherwise just remove it from the unaffiliated port list. The IB device is registered as a multiport device, even if a 2nd port is not available for affiliation. When the 2nd port is affiliated later the GID cache must be refreshed in order to get the default GIDs for the 2nd port in the cache. Export roce_rescan_device to provide a mechanism to refresh the cache after a new port is bound. In a multiport configuration all IB object (QP, MR, PD, etc) related commands should flow through the master mlx5_core_dev, other commands must be sent to the slave port mlx5_core_mdev, an interface is provide to get the correct mdev for non IB object commands. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 2 +- drivers/infiniband/core/core_priv.h | 1 - drivers/infiniband/core/roce_gid_mgmt.c | 11 +- drivers/infiniband/hw/mlx5/main.c | 421 +++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 28 ++ .../net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 58 +++ include/linux/mlx5/driver.h | 22 +- include/linux/mlx5/mlx5_ifc.h | 31 +- include/linux/mlx5/vport.h | 4 + include/rdma/ib_verbs.h | 8 + 12 files changed, 550 insertions(+), 42 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index fc4022884dbb..e9a409d7f4e2 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -821,7 +821,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev) if (err) return err; - roce_rescan_device(ib_dev); + rdma_roce_rescan_device(ib_dev); return err; } diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 39e3c1d02613..39e4acdb025e 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -137,7 +137,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, int roce_gid_mgmt_init(void); void roce_gid_mgmt_cleanup(void); -void roce_rescan_device(struct ib_device *ib_dev); unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index ebfe45739ca7..5a52ec77940a 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -410,13 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rtnl_unlock(); } -/* This function will rescan all of the network devices in the system - * and add their gids, as needed, to the relevant RoCE devices. */ -void roce_rescan_device(struct ib_device *ib_dev) +/** + * rdma_roce_rescan_device - Rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. + * + * @device: the rdma device + */ +void rdma_roce_rescan_device(struct ib_device *ib_dev) { ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, enum_all_gids_of_dev_cb, NULL); } +EXPORT_SYMBOL(rdma_roce_rescan_device); static void callback_for_addr_gid_device_scan(struct ib_device *device, u8 port, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5fcb2ed94c11..4fbbe4c7a99b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -74,6 +74,23 @@ enum { MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, }; +static LIST_HEAD(mlx5_ib_unaffiliated_port_list); +static LIST_HEAD(mlx5_ib_dev_list); +/* + * This mutex should be held when accessing either of the above lists + */ +static DEFINE_MUTEX(mlx5_ib_multiport_mutex); + +struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) +{ + struct mlx5_ib_dev *dev; + + mutex_lock(&mlx5_ib_multiport_mutex); + dev = mpi->ibdev; + mutex_unlock(&mlx5_ib_multiport_mutex); + return dev; +} + static enum rdma_link_layer mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { @@ -120,7 +137,9 @@ static int mlx5_netdev_event(struct notifier_block *this, struct mlx5_ib_dev *ibdev; ibdev = roce->dev; - mdev = ibdev->mdev; + mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); + if (!mdev) + return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: @@ -175,6 +194,7 @@ static int mlx5_netdev_event(struct notifier_block *this, break; } done: + mlx5_ib_put_native_port_mdev(ibdev, port_num); return NOTIFY_DONE; } @@ -183,10 +203,15 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, { struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; + struct mlx5_core_dev *mdev; + + mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); + if (!mdev) + return NULL; - ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + ndev = mlx5_lag_get_roce_netdev(mdev); if (ndev) - return ndev; + goto out; /* Ensure ndev does not disappear before we invoke dev_hold() */ @@ -196,9 +221,70 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, dev_hold(ndev); read_unlock(&ibdev->roce[port_num - 1].netdev_lock); +out: + mlx5_ib_put_native_port_mdev(ibdev, port_num); return ndev; } +struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, + u8 ib_port_num, + u8 *native_port_num) +{ + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, + ib_port_num); + struct mlx5_core_dev *mdev = NULL; + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_port *port; + + if (native_port_num) + *native_port_num = 1; + + if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return ibdev->mdev; + + port = &ibdev->port[ib_port_num - 1]; + if (!port) + return NULL; + + spin_lock(&port->mp.mpi_lock); + mpi = ibdev->port[ib_port_num - 1].mp.mpi; + if (mpi && !mpi->unaffiliate) { + mdev = mpi->mdev; + /* If it's the master no need to refcount, it'll exist + * as long as the ib_dev exists. + */ + if (!mpi->is_master) + mpi->mdev_refcnt++; + } + spin_unlock(&port->mp.mpi_lock); + + return mdev; +} + +void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num) +{ + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, + port_num); + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_port *port; + + if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return; + + port = &ibdev->port[port_num - 1]; + + spin_lock(&port->mp.mpi_lock); + mpi = ibdev->port[port_num - 1].mp.mpi; + if (mpi->is_master) + goto out; + + mpi->mdev_refcnt--; + if (mpi->unaffiliate) + complete(&mpi->unref_comp); +out: + spin_unlock(&port->mp.mpi_lock); +} + static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, u8 *active_width) { @@ -3160,12 +3246,11 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int get_port_caps(struct mlx5_ib_dev *dev) +static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - int port; struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); @@ -3186,22 +3271,21 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= dev->num_ports; port++) { - memset(pprops, 0, sizeof(*pprops)); - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - break; - } - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", - dprops->max_pkeys, pprops->gid_tbl_len); + memset(pprops, 0, sizeof(*pprops)); + err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); + if (err) { + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); + goto out; } + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; + mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", + port, dprops->max_pkeys, pprops->gid_tbl_len); + out: kfree(pprops); kfree(dprops); @@ -4054,8 +4138,203 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) return mlx5_get_vector_affinity(dev->mdev, comp_vector); } +/* The mlx5_ib_multiport_mutex should be held when calling this function */ +static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, + struct mlx5_ib_multiport_info *mpi) +{ + u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + struct mlx5_ib_port *port = &ibdev->port[port_num]; + int comps; + int err; + int i; + + spin_lock(&port->mp.mpi_lock); + if (!mpi->ibdev) { + spin_unlock(&port->mp.mpi_lock); + return; + } + mpi->ibdev = NULL; + + spin_unlock(&port->mp.mpi_lock); + mlx5_remove_netdev_notifier(ibdev, port_num); + spin_lock(&port->mp.mpi_lock); + + comps = mpi->mdev_refcnt; + if (comps) { + mpi->unaffiliate = true; + init_completion(&mpi->unref_comp); + spin_unlock(&port->mp.mpi_lock); + + for (i = 0; i < comps; i++) + wait_for_completion(&mpi->unref_comp); + + spin_lock(&port->mp.mpi_lock); + mpi->unaffiliate = false; + } + + port->mp.mpi = NULL; + + list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); + + spin_unlock(&port->mp.mpi_lock); + + err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev); + + mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1); + /* Log an error, still needed to cleanup the pointers and add + * it back to the list. + */ + if (err) + mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", + port_num + 1); + + ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; +} + +/* The mlx5_ib_multiport_mutex should be held when calling this function */ +static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, + struct mlx5_ib_multiport_info *mpi) +{ + u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + int err; + + spin_lock(&ibdev->port[port_num].mp.mpi_lock); + if (ibdev->port[port_num].mp.mpi) { + mlx5_ib_warn(ibdev, "port %d already affiliated.\n", + port_num + 1); + spin_unlock(&ibdev->port[port_num].mp.mpi_lock); + return false; + } + + ibdev->port[port_num].mp.mpi = mpi; + mpi->ibdev = ibdev; + spin_unlock(&ibdev->port[port_num].mp.mpi_lock); + + err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + + err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); + if (err) + goto unbind; + + err = mlx5_add_netdev_notifier(ibdev, port_num); + if (err) { + mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", + port_num + 1); + goto unbind; + } + + return true; + +unbind: + mlx5_ib_unbind_slave_port(ibdev, mpi); + return false; +} + +static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) +{ + int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, + port_num + 1); + struct mlx5_ib_multiport_info *mpi; + int err; + int i; + + if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return 0; + + err = mlx5_query_nic_vport_system_image_guid(dev->mdev, + &dev->sys_image_guid); + if (err) + return err; + + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + + mutex_lock(&mlx5_ib_multiport_mutex); + for (i = 0; i < dev->num_ports; i++) { + bool bound = false; + + /* build a stub multiport info struct for the native port. */ + if (i == port_num) { + mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); + if (!mpi) { + mutex_unlock(&mlx5_ib_multiport_mutex); + mlx5_nic_vport_disable_roce(dev->mdev); + return -ENOMEM; + } + + mpi->is_master = true; + mpi->mdev = dev->mdev; + mpi->sys_image_guid = dev->sys_image_guid; + dev->port[i].mp.mpi = mpi; + mpi->ibdev = dev; + mpi = NULL; + continue; + } + + list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, + list) { + if (dev->sys_image_guid == mpi->sys_image_guid && + (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { + bound = mlx5_ib_bind_slave_port(dev, mpi); + } + + if (bound) { + dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n"); + mlx5_ib_dbg(dev, "port %d bound\n", i + 1); + list_del(&mpi->list); + break; + } + } + if (!bound) { + get_port_caps(dev, i + 1); + mlx5_ib_dbg(dev, "no free port found for port %d\n", + i + 1); + } + } + + list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); + mutex_unlock(&mlx5_ib_multiport_mutex); + return err; +} + +static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) +{ + int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, + port_num + 1); + int i; + + if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return; + + mutex_lock(&mlx5_ib_multiport_mutex); + for (i = 0; i < dev->num_ports; i++) { + if (dev->port[i].mp.mpi) { + /* Destroy the native port stub */ + if (i == port_num) { + kfree(dev->port[i].mp.mpi); + dev->port[i].mp.mpi = NULL; + } else { + mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1); + mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi); + } + } + } + + mlx5_ib_dbg(dev, "removing from devlist\n"); + list_del(&dev->ib_dev_list); + mutex_unlock(&mlx5_ib_multiport_mutex); + + mlx5_nic_vport_disable_roce(dev->mdev); +} + static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); #endif @@ -4067,16 +4346,36 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; const char *name; int err; + int i; dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) return -ENOMEM; - err = get_port_caps(dev); + for (i = 0; i < dev->num_ports; i++) { + spin_lock_init(&dev->port[i].mp.mpi_lock); + rwlock_init(&dev->roce[i].netdev_lock); + } + + err = mlx5_ib_init_multiport_master(dev); if (err) goto err_free_port; + if (!mlx5_core_mp_enabled(mdev)) { + int i; + + for (i = 1; i <= dev->num_ports; i++) { + err = get_port_caps(dev, i); + if (err) + break; + } + } else { + err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); + } + if (err) + goto err_mp; + if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); @@ -4106,6 +4405,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) #endif return 0; +err_mp: + mlx5_ib_cleanup_multiport_master(dev); err_free_port: kfree(dev->port); @@ -4252,16 +4553,16 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num = 0; + u8 port_num; int err; int i; + port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { for (i = 0; i < dev->num_ports; i++) { - rwlock_init(&dev->roce[i].netdev_lock); dev->roce[i].dev = dev; dev->roce[i].native_port_num = i + 1; dev->roce[i].last_port_state = IB_PORT_DOWN; @@ -4292,8 +4593,9 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num = 0; + u8 port_num; + port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -4443,6 +4745,8 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); + static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, const struct mlx5_ib_profile *profile) { @@ -4457,7 +4761,8 @@ static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, return NULL; dev->mdev = mdev; - dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); + dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -4520,15 +4825,81 @@ static const struct mlx5_ib_profile pf_profile = { NULL), }; +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +{ + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_dev *dev; + bool bound = false; + int err; + + mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); + if (!mpi) + return NULL; + + mpi->mdev = mdev; + + err = mlx5_query_nic_vport_system_image_guid(mdev, + &mpi->sys_image_guid); + if (err) { + kfree(mpi); + return NULL; + } + + mutex_lock(&mlx5_ib_multiport_mutex); + list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { + if (dev->sys_image_guid == mpi->sys_image_guid) + bound = mlx5_ib_bind_slave_port(dev, mpi); + + if (bound) { + rdma_roce_rescan_device(&dev->ib_dev); + break; + } + } + + if (!bound) { + list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); + dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); + } else { + mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); + } + mutex_unlock(&mlx5_ib_multiport_mutex); + + return mpi; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { + enum rdma_link_layer ll; + int port_type_cap; + + port_type_cap = MLX5_CAP_GEN(mdev, port_type); + ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { + u8 port_num = mlx5_core_native_port_num(mdev) - 1; + + return mlx5_ib_add_slave_port(mdev, port_num); + } + return __mlx5_ib_add(mdev, &pf_profile); } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { - struct mlx5_ib_dev *dev = context; + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_dev *dev; + + if (mlx5_core_is_mp_slave(mdev)) { + mpi = context; + mutex_lock(&mlx5_ib_multiport_mutex); + if (mpi->ibdev) + mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); + list_del(&mpi->list); + mutex_unlock(&mlx5_ib_multiport_mutex); + return; + } + dev = context; __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6106dde35144..a70a4c02e396 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -654,8 +654,17 @@ struct mlx5_ib_counters { u16 set_id; }; +struct mlx5_ib_multiport_info; + +struct mlx5_ib_multiport { + struct mlx5_ib_multiport_info *mpi; + /* To be held when accessing the multiport info */ + spinlock_t mpi_lock; +}; + struct mlx5_ib_port { struct mlx5_ib_counters cnts; + struct mlx5_ib_multiport mp; }; struct mlx5_roce { @@ -756,6 +765,17 @@ struct mlx5_ib_profile { struct mlx5_ib_stage stage[MLX5_IB_STAGE_MAX]; }; +struct mlx5_ib_multiport_info { + struct list_head list; + struct mlx5_ib_dev *ibdev; + struct mlx5_core_dev *mdev; + struct completion unref_comp; + u64 sys_image_guid; + u32 mdev_refcnt; + bool is_master; + bool unaffiliate; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -800,6 +820,8 @@ struct mlx5_ib_dev { struct mutex lb_mutex; u32 user_td; u8 umr_fence; + struct list_head ib_dev_list; + u64 sys_image_guid; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1071,6 +1093,12 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn); +struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi); +struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, + u8 ib_port_num, + u8 *native_port_num); +void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, + u8 port_num); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c4392f741c5f..c841b03c3e48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -688,7 +688,7 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); - MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); @@ -727,7 +727,7 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) MLX5_SET(qpc, qpc, next_rcv_psn, MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); - MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); MLX5_SET(qpc, qpc, primary_address_path.udp_sport, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index d2a66dc4adc6..261b95d014a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -187,7 +187,7 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp MLX5_QP_ENHANCED_ULP_STATELESS_MODE); addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); - MLX5_SET(ads, addr_path, port, 1); + MLX5_SET(ads, addr_path, vhca_port_num, 1); MLX5_SET(ads, addr_path, grh, 1); ret = mlx5_core_create_qp(mdev, qp, in, inlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 916523103f16..9cb939b6a859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1121,3 +1121,61 @@ ex: return err; } EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5_nic_vport_enable_roce(port_mdev); + if (err) + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (err) + mlx5_nic_vport_disable_roce(port_mdev); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport); + +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, 0); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, 0); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (!err) + mlx5_nic_vport_disable_roce(port_mdev); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 28733529f6ff..d5c787519e06 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1234,9 +1234,29 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) return !!(dev->priv.rl_table.max_size); } +static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) && + MLX5_CAP_GEN(dev, num_vhca_ports) <= 1; +} + +static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, num_vhca_ports) > 1; +} + +static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_mp_slave(dev) || + mlx5_core_is_mp_master(dev); +} + static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) { - return 1; + if (!mlx5_core_mp_enabled(dev)) + return 1; + + return MLX5_CAP_GEN(dev, native_port_num); } enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b1c81d7a86cb..7e88c8e7f374 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -502,7 +502,7 @@ struct mlx5_ifc_ads_bits { u8 dei_cfi[0x1]; u8 eth_prio[0x3]; u8 sl[0x4]; - u8 port[0x8]; + u8 vhca_port_num[0x8]; u8 rmac_47_32[0x10]; u8 rmac_31_0[0x20]; @@ -794,7 +794,10 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x80]; + u8 reserved_at_0[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x40]; u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; @@ -1066,8 +1069,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_5f8[0x3]; u8 log_max_xrq[0x5]; - u8 reserved_at_600[0x1e]; - u8 sw_owner_id; + u8 affiliate_nic_vport_criteria[0x8]; + u8 native_port_num[0x8]; + u8 num_vhca_ports[0x8]; + u8 reserved_at_618[0x6]; + u8 sw_owner_id[0x1]; u8 reserved_at_61f[0x1e1]; }; @@ -2617,7 +2623,12 @@ struct mlx5_ifc_nic_vport_context_bits { u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; - u8 reserved_at_40[0xf0]; + u8 reserved_at_40[0xc]; + + u8 affiliation_criteria[0x4]; + u8 affiliated_vhca_id[0x10]; + + u8 reserved_at_60[0xd0]; u8 mtu[0x10]; @@ -3260,7 +3271,8 @@ struct mlx5_ifc_set_roce_address_in_bits { u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_at_50[0x10]; + u8 reserved_at_50[0xc]; + u8 vhca_port_num[0x4]; u8 reserved_at_60[0x20]; @@ -3880,7 +3892,8 @@ struct mlx5_ifc_query_roce_address_in_bits { u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_at_50[0x10]; + u8 reserved_at_50[0xc]; + u8 vhca_port_num[0x4]; u8 reserved_at_60[0x20]; }; @@ -5312,7 +5325,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x14]; + u8 reserved_at_0[0x12]; + u8 affiliation[0x1]; + u8 reserved_at_e[0x1]; u8 disable_uc_local_lb[0x1]; u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aaa0bb9e7655..64e193e87394 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -116,4 +116,8 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *req); int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable); int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev); +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e44a8adac677..f25c03687ee9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3850,4 +3850,12 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } +/** + * rdma_roce_rescan_device - Rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. + * + * @device: the rdma device + */ +void rdma_roce_rescan_device(struct ib_device *ibdev); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From cfe4e37fdcacbc33176cfc2430df96355ee14489 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:41 +0200 Subject: {net, IB}/mlx5: Change set_roce_gid to take a port number When in dual port mode setting a RoCE GID for any port flows through the master ports mlx5_core_dev. Provide an interface to set the port when sending this command. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 5 ++++- include/linux/mlx5/driver.h | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4791d747cc57..653b56377e69 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -475,7 +475,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, roce_l3_type, gid->raw, mac, vlan, - vlan_id); + vlan_id, port_num); } static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c841b03c3e48..e6175f8ac0e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -888,7 +888,8 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, MLX5_ROCE_VERSION_2, MLX5_ROCE_L3_TYPE_IPV6, - remote_ip, remote_mac, true, 0); + remote_ip, remote_mac, true, 0, + MLX5_FPGA_PORT_NUM); if (err) { mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); ret = ERR_PTR(err); @@ -954,7 +955,7 @@ err_cq: mlx5_fpga_conn_destroy_cq(conn); err_gid: mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, - NULL, false, 0); + NULL, false, 0, MLX5_FPGA_PORT_NUM); err_rsvd_gid: mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); err: @@ -982,7 +983,7 @@ void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) mlx5_fpga_conn_destroy_cq(conn); mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, - NULL, NULL, false, 0); + NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM); mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); kfree(conn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c index 573f59f46d41..7722a3f9bb68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, - const u8 *mac, bool vlan, u16 vlan_id) + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num) { #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; @@ -148,6 +148,9 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, memcpy(addr_l3_addr, gid, gidsz); } + if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) + MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); + MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d5c787519e06..9136e35f2f7e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1112,7 +1112,7 @@ void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, - const u8 *mac, bool vlan, u16 vlan_id); + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num); static inline int fw_initializing(struct mlx5_core_dev *dev) { -- cgit v1.2.3 From c4b76d8d95600d143f4d75f1bd700eb5d378099f Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:44 +0200 Subject: net/mlx5: Set num_vhca_ports capability Set the current capability to the max capability. Doing so enables dual port RoCE functionality if supported by the firmware. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e382a3ca759e..d4a471a76d82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -557,6 +557,12 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN_MAX(dev, dct)) MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + num_vhca_ports, + MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); -- cgit v1.2.3 From 852f6927594d0d3e8632c889b2ab38cbc46476ad Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 12 Jan 2018 07:58:40 +0200 Subject: IB/mlx4: Fix incorrectly releasing steerable UD QPs when have only ETH ports Allocating steerable UD QPs depends on having at least one IB port, while releasing those QPs does not. As a result, when there are only ETH ports, the IB (RoCE) driver requests releasing a qp range whose base qp is zero, with qp count zero. When SR-IOV is enabled, and the VF driver is running on a VM over a hypervisor which treats such qp release calls as errors (rather than NOPs), we see lines in the VM message log like: mlx4_core 0002:00:02.0: Failed to release qp range base:0 cnt:0 Fix this by adding a check for a zero count in mlx4_release_qp_range() (which thus treats releasing 0 qps as a nop), and eliminating the check for device managed flow steering when releasing steerable UD QPs. (Freeing ib_uc_qpns_bitmap unconditionally is also OK, since it remains NULL when steerable UD QPs are not allocated). Cc: Fixes: 4196670be786 ("IB/mlx4: Don't allocate range of steerable UD QPs for Ethernet-only device") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 13 +++++-------- drivers/net/ethernet/mellanox/mlx4/qp.c | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5695ce53fddb..8d2ee9322f2e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3001,9 +3001,8 @@ err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); err_counter: for (i = 0; i < ibdev->num_ports; ++i) mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); @@ -3108,11 +3107,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->iboe.nb.notifier_call = NULL; } - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); - kfree(ibdev->ib_uc_qpns_bitmap); - } + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); + kfree(ibdev->ib_uc_qpns_bitmap); iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 769598f7b6c8..3aaf4bad6c5a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -287,6 +287,9 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) u64 in_param = 0; int err; + if (!cnt) + return; + if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, base_qpn); set_param_h(&in_param, cnt); -- cgit v1.2.3 From 24d33d2c8e92abffe1f0653d42fc65b8f164a6d9 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Tue, 16 Jan 2018 20:08:40 +0200 Subject: net/mlx5e: Add clock info page to mlx5 core devices Adds a new page to mlx5 core containing clock info data that allows user level applications to translate between cqe timestamp to nanoseconds. The information stored into this page is represented through mlx5_ib_clock_info. In order to synchronize between kernel and user space a sequence number is incremented at the beginning and end of each update. An odd number means the data is being updated while an even means the access was already done. To guarantee that the data structure was accessed atomically user will: repeat: seq1 = goto while odd seq2 = if seq1 != seq2 goto repeat Reviewed-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Vesker Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 55 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 3 ++ include/uapi/rdma/mlx5-abi.h | 16 +++++++ 3 files changed, 74 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index fa8aed62b231..4b6cb9b38686 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -31,6 +31,8 @@ */ #include +#include +#include #include "en.h" enum { @@ -71,6 +73,28 @@ static u64 read_internal_timer(const struct cyclecounter *cc) return mlx5_read_internal_timer(mdev) & cc->mask; } +static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = &mdev->clock; + u32 sign; + + if (!clock_info) + return; + + sign = smp_load_acquire(&clock_info->sign); + smp_store_mb(clock_info->sign, + sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING); + + clock_info->cycles = clock->tc.cycle_last; + clock_info->mult = clock->cycles.mult; + clock_info->nsec = clock->tc.nsec; + clock_info->frac = clock->tc.frac; + + smp_store_release(&clock_info->sign, + sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2); +} + static void mlx5_pps_out(struct work_struct *work) { struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, @@ -109,6 +133,7 @@ static void mlx5_timestamp_overflow(struct work_struct *work) write_lock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } @@ -123,6 +148,7 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, write_lock_irqsave(&clock->lock, flags); timecounter_init(&clock->tc, &clock->cycles, ns); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -152,6 +178,7 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) write_lock_irqsave(&clock->lock, flags); timecounter_adjtime(&clock->tc, delta); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -179,6 +206,7 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -470,6 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->cycles.shift); clock->nominal_c_mult = clock->cycles.mult; clock->cycles.mask = CLOCKSOURCE_MASK(41); + clock->mdev = mdev; timecounter_init(&clock->tc, &clock->cycles, ktime_to_ns(ktime_get_real())); @@ -482,6 +511,25 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / 2 / HZ); clock->overflow_period = ns; + mdev->clock_info_page = alloc_page(GFP_KERNEL); + if (mdev->clock_info_page) { + mdev->clock_info = kmap(mdev->clock_info_page); + if (!mdev->clock_info) { + __free_page(mdev->clock_info_page); + mlx5_core_warn(mdev, "failed to map clock page\n"); + } else { + mdev->clock_info->sign = 0; + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = + clock->overflow_period; + } + } + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); if (clock->overflow_period) @@ -521,5 +569,12 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_work_sync(&clock->pps_info.out_work); cancel_delayed_work_sync(&clock->overflow_work); + + if (mdev->clock_info) { + kunmap(mdev->clock_info_page); + __free_page(mdev->clock_info_page); + mdev->clock_info = NULL; + } + kfree(clock->ptp_info.pin_config); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9136e35f2f7e..c403151133e9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -797,6 +797,7 @@ struct mlx5_clock { u32 nominal_c_mult; unsigned long overflow_period; struct delayed_work overflow_work; + struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; @@ -844,6 +845,8 @@ struct mlx5_core_dev { struct cpu_rmap *rmap; #endif struct mlx5_clock clock; + struct mlx5_ib_clock_info *clock_info; + struct page *clock_info_page; }; struct mlx5_db { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index f6d319dfc7bf..0299deed71a2 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -381,4 +381,20 @@ struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; }; + +struct mlx5_ib_clock_info { + __u32 sign; + __u32 resv; + __u64 nsec; + __u64 cycles; + __u64 frac; + __u32 mult; + __u32 shift; + __u64 mask; + __u64 overflow_period; +}; + +enum { + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING = 1, +}; #endif /* MLX5_ABI_USER_H */ -- cgit v1.2.3