summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-24 05:45:06 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-24 05:45:06 +0300
commit048ccca8c1c8f583deec3367d7df521bb1f542ae (patch)
treeefa882c88f658f711d63581a3063203c63682338 /drivers/infiniband/hw/mlx4
parentb3e27d5d4a29bcc8e057b496d5ef5194addaaac0 (diff)
parent34356f64ac0df2326fa50e2d4bca6f7c03ed16c1 (diff)
downloadlinux-048ccca8c1c8f583deec3367d7df521bb1f542ae.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "Initial roundup of 4.5 merge window patches - Remove usage of ib_query_device and instead store attributes in ib_device struct - Move iopoll out of block and into lib, rename to irqpoll, and use in several places in the rdma stack as our new completion queue polling library mechanism. Update the other block drivers that already used iopoll to use the new mechanism too. - Replace the per-entry GID table locks with a single GID table lock - IPoIB multicast cleanup - Cleanups to the IB MR facility - Add support for 64bit extended IB counters - Fix for netlink oops while parsing RDMA nl messages - RoCEv2 support for the core IB code - mlx4 RoCEv2 support - mlx5 RoCEv2 support - Cross Channel support for mlx5 - Timestamp support for mlx5 - Atomic support for mlx5 - Raw QP support for mlx5 - MAINTAINERS update for mlx4/mlx5 - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates - Add support for remote invalidate to the iSER driver (pushed through the RDMA tree due to dependencies, acknowledged by nab) - Update to NFSoRDMA (pushed through the RDMA tree due to dependencies, acknowledged by Bruce)" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (169 commits) IB/mlx5: Unify CQ create flags check IB/mlx5: Expose Raw Packet QP to user space consumers {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib IB/mlx5: Support setting Ethernet priority for Raw Packet QPs IB/mlx5: Add Raw Packet QP query functionality IB/mlx5: Add create and destroy functionality for Raw Packet QP IB/mlx5: Refactor mlx5_ib_qp to accommodate other QP types IB/mlx5: Allocate a Transport Domain for each ucontext net/mlx5_core: Warn on unsupported events of QP/RQ/SQ net/mlx5_core: Add RQ and SQ event handling net/mlx5_core: Export transport objects IB/mlx5: Expose CQE version to user-space IB/mlx5: Add CQE version 1 support to user QPs and SRQs IB/mlx5: Fix data validation in mlx5_ib_alloc_ucontext IB/sa: Fix netlink local service GFP crash IB/srpt: Remove redundant wc array IB/qib: Improve ipoib UD performance IB/mlx4: Advertise RoCE v2 support IB/mlx4: Create and use another QP1 for RoCEv2 IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers ...
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h10
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c22
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c318
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c3
7 files changed, 337 insertions, 124 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 86af71351d9a..105246fba2e7 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
- memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+ eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
if (is_vlan_dev(gid_attr.ndev))
vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index b88fc8f5ab18..9f8b516eb2b0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -811,9 +811,6 @@ repoll:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX4_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 97d6878f9938..1c7ab6cabbb8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
return dev;
}
-static int mlx4_ib_update_gids(struct gid_entry *gids,
- struct mlx4_ib_dev *ibdev,
- u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return err;
}
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ struct {
+ union ib_gid gid;
+ __be32 rsrvd1[2];
+ __be16 rsrvd2;
+ u8 type;
+ u8 version;
+ __be32 rsrvd3;
+ } *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+ if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ gid_tbl[i].version = 2;
+ if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+ gid_tbl[i].type = 1;
+ else
+ memset(&gid_tbl[i].gid, 0, 12);
+ }
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+ return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+ (port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ gids[i].gid_type = port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
int i;
int ret;
unsigned long flags;
+ struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
+ if (attr.ndev)
+ dev_put(attr.ndev);
+
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+ attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- else
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ }
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err) {
+ goto err_notif;
+ }
+ }
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1caa11edac03..52ce7b000044 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
unsigned tail;
};
+enum {
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+ /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -478,6 +485,7 @@ struct gid_cache_context {
struct gid_entry {
union ib_gid gid;
+ enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 4d1e1c632603..242b94ec105b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -366,28 +366,6 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct ib_bind_mw_wr wr;
- struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.opcode = IB_WR_BIND_MW;
- wr.wr.wr_id = mw_bind->wr_id;
- wr.wr.send_flags = mw_bind->send_flags;
- wr.mw = mw;
- wr.bind_info = mw_bind->bind_info;
- wr.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
- if (!ret)
- mw->rkey = wr.rkey;
-
- return ret;
-}
-
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 13eaaf45288f..bc5536f00b6c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,6 +32,8 @@
*/
#include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
};
enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
- [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
}
- return proxy_sqp;
+ if (proxy_sqp)
+ return 1;
+
+ return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
}
/* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+ qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+ qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = NULL;
int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
- init_attr->qp_type != IB_QPT_UD) ||
- ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
- init_attr->qp_type > IB_QPT_GSI)))
- return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags) {
+ if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+ return ERR_PTR(-EINVAL);
+
+ if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+ init_attr->qp_type > IB_QPT_GSI) ||
+ (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+ init_attr->qp_type != IB_QPT_GSI))
+ return ERR_PTR(-EINVAL);
+ }
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ int sqpn;
+
/* Userspace is not allowed to create special QPs: */
if (udata)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+ if (res)
+ return ERR_PTR(res);
+ } else {
+ sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+ }
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- get_sqp_num(to_mdev(pd->device), init_attr),
+ sqpn,
&qp, gfp);
if (err)
return ERR_PTR(err);
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+ init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
break;
}
default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return &qp->ibqp;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata) {
+ struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_dev *dev = to_mdev(device);
+
+ ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+ if (!IS_ERR(ibqp) &&
+ (init_attr->qp_type == IB_QPT_GSI) &&
+ !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+ struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+ if (is_eth &&
+ dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+ if (IS_ERR(sqp->roce_v2_gsi)) {
+ pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ sqp->roce_v2_gsi = NULL;
+ } else {
+ sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+ sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ }
+
+ init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ }
+ }
+ return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+ if (sqp->roce_v2_gsi)
+ ib_destroy_qp(sqp->roce_v2_gsi);
+ }
+
+ return _mlx4_ib_destroy_qp(qp);
+}
+
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
return 0;
}
+enum {
+ MLX4_QPC_ROCE_MODE_1 = 0,
+ MLX4_QPC_ROCE_MODE_2 = 2,
+ MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ return MLX4_QPC_ROCE_MODE_1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return MLX4_QPC_ROCE_MODE_2;
+ default:
+ return MLX4_QPC_ROCE_MODE_UNDEFINED;
+ }
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+ IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH;
- if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH) {
+ if (is_eth) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+ if (is_eth &&
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+ if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+ err = -EINVAL;
+ goto out;
+ }
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
+
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->rlkey |= (1 << 4);
+ context->rlkey_roce_mode |= (1 << 4);
/*
* Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
return err;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
return err;
}
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ int ret;
+
+ ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ int err = 0;
+
+ if (sqp->roce_v2_gsi)
+ err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+ if (err)
+ pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+ err);
+ }
+ return ret;
+}
+
static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
- ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
- int i;
-
- for (i = ETH_ALEN; i; i--) {
- dst_mac[i - 1] = src_mac & 0xff;
- src_mac >>= 8;
- }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
bool is_eth;
bool is_vlan = false;
bool is_grh;
+ bool is_udp = false;
+ int ip_version = 0;
send_size = 0;
for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+ struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid,
- NULL);
- if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- if (err)
+ &gid_attr);
+ if (!err) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
+ }
+ if (!err) {
+ is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+ ip_version = 4;
+ else
+ ip_version = 6;
+ is_grh = false;
+ }
+ } else {
return err;
+ }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+ err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &sqp->ud_header);
+ if (err)
+ return err;
if (!is_eth) {
sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
- if (is_grh) {
+ if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
ah->av.ib.dgid, 16);
}
+ if (ip_version == 4) {
+ sqp->ud_header.ip4.tos =
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.ip4.id = 0;
+ sqp->ud_header.ip4.frag_off = htons(IP_DF);
+ sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+ memcpy(&sqp->ud_header.ip4.saddr,
+ sgid.raw + 12, 4);
+ memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+ sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+ }
+
+ if (is_udp) {
+ sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+ sqp->ud_header.udp.csum = 0;
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (is_eth) {
struct in6_addr in6;
-
+ u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+ ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
mlx->sched_prio = cpu_to_be16(pcp);
+ ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
- /* FIXME: cache smac value? */
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
- u8 smac[ETH_ALEN];
-
- mlx4_u64_to_smac(smac, mac);
- memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
- } else {
- /* use the src mac of the tunnel */
- memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
- }
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.eth.type = cpu_to_be16(ether_type);
} else {
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
- struct ib_bind_mw_wr *wr)
-{
- bseg->flags1 =
- convert_access(wr->bind_info.mw_access_flags) &
- cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
- MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
- MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
- bseg->flags2 = 0;
- if (wr->mw->type == IB_MW_TYPE_2)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->rkey);
- bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->bind_info.addr);
- bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+ if (sqp->roce_v2_gsi) {
+ struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+ struct ib_gid_attr gid_attr;
+ union ib_gid gid;
+
+ if (!ib_get_cached_gid(ibqp->device,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &gid,
+ &gid_attr)) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ } else {
+ pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+ ah->av.ib.gid_index);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
- case IB_WR_BIND_MW:
- ctrl->srcrb_flags |=
- cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, bind_mw_wr(wr));
- wqe += sizeof(struct mlx4_wqe_bind_seg);
- size += sizeof(struct mlx4_wqe_bind_seg) / 16;
- break;
default:
/* No extra segments required for sends */
break;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index c394376ebe06..0597f3eef5d0 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
- srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+ GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);